def fit(hyperparameter):
        model = MLP(3 * 32 * 32, hyperparameter['dnn_hidden_units'], 10,
                    hyperparameter).to(device)

        loss_module = nn.CrossEntropyLoss()
        optimizer = hyperparameter['optimizer'](
            model.parameters(), lr=hyperparameter['learning_rate'])

        results = dict(train_scores=list(), val_scores=list())
        for i in range(hyperparameter['n_steps']):
            x, y = train_data.next_batch(FLAGS.batch_size)

            x, y = torch.from_numpy(x).float().to(device), torch.from_numpy(
                y).long().to(device)

            preds = model(torch.flatten(x, start_dim=1))
            preds = preds.squeeze(dim=1)

            if i % FLAGS.eval_freq == FLAGS.eval_freq - 1:
                results['train_scores'].append(accuracy(preds, y).cpu())

                x_test, y_test = test_data.next_batch(300)
                x_test, y_test = torch.from_numpy(x_test).float().to(
                    device), torch.from_numpy(y_test).long().to(device)
                preds_test = model(torch.flatten(x_test, start_dim=1))
                preds_test = preds_test.squeeze(dim=1)
                results['val_scores'].append(
                    accuracy(preds_test, y_test).cpu())
                print("current step: ", accuracy(preds, y))

            _, y = torch.max(y, dim=1)
            loss = loss_module(preds, y)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        plot_history(results)

        # Test
        x, y = test_data.next_batch(10000)

        x, y = torch.from_numpy(x).float().to(device), torch.from_numpy(
            y).long().to(device)

        preds = model(torch.flatten(x, start_dim=1))
        preds = preds.squeeze(dim=1)

        print("Test Accuracy: ", accuracy(preds, y))
        return accuracy(preds, y)
示例#2
0
def main():
    """
    Main function
    """
    # Print all Flags to confirm parameter settings
    print_flags()

    if not os.path.exists(FLAGS.data_dir):
        os.makedirs(FLAGS.data_dir)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    # neg_slope = FLAGS.neg_slope

    data = cifar10_utils.get_cifar10(FLAGS.data_dir,
                                     one_hot=False,
                                     validation_size=0)

    img_shape = data["train"].images[0].shape

    # print(np.prod(img_shape), dnn_hidden_units, N_CLASSES)
    mlp = MLP(np.prod(img_shape), dnn_hidden_units, N_CLASSES)
    print(mlp)

    optimizer = optim.SGD(mlp.parameters(), lr=FLAGS.learning_rate)
    loss_module = nn.CrossEntropyLoss()

    # run the training operation
    train(mlp, data, optimizer, loss_module)
def train():
    """
    Performs training and evaluation of MLP model. 
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print('device', device)

    # flags
    batch_size = FLAGS.batch_size
    optim = FLAGS.optimizer
    lr = FLAGS.learning_rate

    # cifar
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
    x_test_np, y_test_np = cifar10['test'].images, cifar10['test'].labels
    (test_images, height, width, colors) = x_test_np.shape
    n_inputs = height * width * colors
    (_, n_classes) = y_test_np.shape

    # torch crap
    x_test_flat = x_test_np.reshape((test_images, n_inputs))
    x_test_torch = torch.from_numpy(x_test_flat).to(device)
    y_test_torch = torch.from_numpy(y_test_np).long().to(device)
    idx_test = torch.argmax(y_test_torch, dim=-1).long()

    # model
    ce = torch.nn.CrossEntropyLoss()
    model = MLP(n_inputs, dnn_hidden_units, n_classes)
    model.to(device)
    pars = model.parameters()

    # optimizer
    optim_pars = {'params': pars, 'lr': lr, 'weight_decay': FLAGS.weight_decay}
    if optim == 'adadelta':
        optimizer = torch.optim.Adadelta(**optim_pars)
    elif optim == 'adagrad':
        optimizer = torch.optim.Adagrad(**optim_pars)
    elif optim == 'rmsprop':
        optimizer = torch.optim.RMSprop(**optim_pars)
    elif optim == 'adam':
        optimizer = torch.optim.Adam(**optim_pars)
    else:
        # default is SGD, same as the numpy version
        optimizer = torch.optim.SGD(**optim_pars)

    cols = ['train_acc', 'test_acc', 'train_loss', 'test_loss', 'secs']

    # train
    results = []
    name = f'mlp-pytorch-{optim}'
    with SummaryWriter(name) as w:
        for step in tqdm(range(FLAGS.max_steps)):
            # print(step)
            optimizer.zero_grad()

            # batch
            x_train_np, y_train_np = cifar10['train'].next_batch(batch_size)
            x_train_flat = x_train_np.reshape((batch_size, n_inputs))
            x_train_torch = torch.from_numpy(x_train_flat).to(device)
            y_train_torch = torch.from_numpy(y_train_np).long().to(device)
            idx_train = torch.argmax(y_train_torch, dim=-1).long()

            # results
            train_predictions = model.forward(x_train_torch)
            train_loss = ce(train_predictions, idx_train)
            train_acc = accuracy(train_predictions, idx_train)

            # evaluate
            if step % FLAGS.eval_freq == 0:
                time = int(step / FLAGS.eval_freq)
                start = timer()
                test_predictions = model.forward(x_test_torch)
                end = timer()
                secs = end - start
                test_loss = ce(test_predictions, idx_test)
                test_acc = accuracy(test_predictions, idx_test)
                vals = [train_acc, test_acc, train_loss, test_loss, secs]
                stats = dict(
                    zip(cols, [
                        np.asscalar(i.detach().cpu().numpy().take(0))
                        if isinstance(i, torch.Tensor) else np.asscalar(i)
                        if isinstance(i, (np.ndarray, np.generic)) else i
                        for i in vals
                    ]))
                # print(yaml.dump({k: round(i, 3) if isinstance(i, float) else i for k, i in stats.items()}))
                print(test_acc.item())
                w.add_scalars('metrics', stats, time)
                results.append(stats)

                # stop if loss has converged!
                check = 10
                if len(results) >= 2 * check:
                    threshold = 1e-6
                    losses = [item['train_loss'] for item in results]
                    current = np.mean(losses[-check:])
                    prev = np.mean(losses[-2 * check:-check])
                    if (prev - current) < threshold:
                        break

            train_loss.backward()
            optimizer.step()

        # w.add_scalars('metrics', stats)

    df = pd.DataFrame(results, columns=cols)
    meta = {
        'framework': 'pytorch',
        'algo': 'mlp',
        'optimizer': optim,
        'batch_size': FLAGS.batch_size,
        'learning_rate': FLAGS.learning_rate,
        'dnn_hidden_units': FLAGS.dnn_hidden_units,
        'weight_decay': FLAGS.weight_decay,
        'max_steps': FLAGS.max_steps,
    }
    for k, v in meta.items():
        df[k] = v
    csv_file = os.path.join(
        os.getcwd(), 'results',
        f'{name}-batch={FLAGS.batch_size}-lr={FLAGS.learning_rate}-hidden={FLAGS.dnn_hidden_units}-regularization={FLAGS.weight_decay}-steps={FLAGS.max_steps}.csv'
    )
    df.to_csv(csv_file)
    csv_file = os.path.join(os.getcwd(), 'results', 'results.csv')
    if os.path.isfile(csv_file):
        df.to_csv(csv_file, header=False, mode='a')
    else:
        df.to_csv(csv_file, header=True, mode='w')
    torch_file = os.path.join(os.getcwd(), 'results', f'{name}.pth')
    torch.save(model.state_dict(), torch_file)
    print('done!')
    return test_loss
示例#4
0
def train():
    """
    Performs training and evaluation of MLP model.
  
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """
    
    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(42)
        torch.cuda.manual_seed_all(42)
    
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    # print("Device", device)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
    else:
        dnn_hidden_units = []
    

    # DNN_HIDDEN_UNITS_DEFAULT = '100'
    # LEARNING_RATE_DEFAULT = 1e-3
    # MAX_STEPS_DEFAULT = 1400
    # BATCH_SIZE_DEFAULT = 200
    # EVAL_FREQ_DEFAULT = 100
    
    data = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir)
    train = data['train']
    print(train.images.shape)
    test = data['test']
    n_inputs = train.images[0].flatten().shape[0]
    n_classes = train.labels[0].shape[0]

    mlp = MLP(n_inputs, dnn_hidden_units, n_classes)
    loss_mod = nn.CrossEntropyLoss()
    if FLAGS.optimizer == 'SGD':
        optimizer = torch.optim.SGD(mlp.parameters(), lr=FLAGS.learning_rate)
    elif FLAGS.optimizer == 'AdamW':
        optimizer = torch.optim.AdamW(mlp.parameters(), lr=FLAGS.learning_rate)
    
    mlp.to(device)

    loss_history = []
    acc_history = []
    for step in range(FLAGS.max_steps): #FLAGS.max_steps
        mlp.train()
        x, y = train.next_batch(FLAGS.batch_size)
        x = torch.from_numpy(x.reshape(x.shape[0], n_inputs)).to(device)
        y = torch.from_numpy(np.argmax(y, axis=1)).to(device) # converts onehot to dense

        out = mlp(x)
        loss = loss_mod(out, y)
        loss_history.append(loss)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if step == 0 or (step + 1) % FLAGS.eval_freq == 0:
            mlp.eval()
            with torch.no_grad():
                x, y = test.images, test.labels
                x = torch.from_numpy(x.reshape(x.shape[0], n_inputs)).to(device)
                y = torch.from_numpy(y).to(device)
                test_out = mlp.forward(x)
                acc = accuracy(test_out, y)
                print('Accuracy:', acc)
                acc_history.append(acc)
    print('Final loss:', loss_history[-1])
    print('Final acc:', acc_history[-1])

    plt.plot(loss_history)
    plt.step(range(0, FLAGS.max_steps + 1, FLAGS.eval_freq), acc_history) # range(0, FLAGS.max_steps, FLAGS.eval_freq)
    plt.legend(['loss', 'accuracy'])
    plt.show()
def train(n_hidden_1, dropout, lr, wdecay, _run):
    """
  Performs training and evaluation of MLP model. 

  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    def get_xy_tensors(batch):
        x, y = batch
        x = torch.tensor(x.reshape(-1, 3072), dtype=torch.float32).to(device)
        y = torch.tensor(y, dtype=torch.long).to(device)
        return x, y

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    datasets = cifar10_utils.read_data_sets(DATA_DIR_DEFAULT, one_hot=False)
    train_data = datasets['train']
    test_data = datasets['test']
    model = MLP(n_inputs=3072,
                n_hidden=[n_hidden_1, 400],
                n_classes=10,
                dropout=dropout).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wdecay)

    log_every = 50
    avg_loss = 0
    avg_acc = 0
    for step in range(FLAGS.max_steps):
        x, y = get_xy_tensors(train_data.next_batch(FLAGS.batch_size))

        # Forward and backward passes
        optimizer.zero_grad()
        out = model.forward(x)
        loss = loss_fn(out, y)
        loss.backward()

        # Parameter updates
        optimizer.step()

        avg_loss += loss.item() / log_every
        avg_acc += accuracy(out, y) / log_every
        if step % log_every == 0:
            print('[{}/{}] train loss: {:.6f}  train acc: {:.6f}'.format(
                step, FLAGS.max_steps, avg_loss, avg_acc))
            _run.log_scalar('train-loss', avg_loss, step)
            _run.log_scalar('train-acc', avg_acc, step)
            avg_loss = 0
            avg_acc = 0

        # Evaluate
        if step % FLAGS.eval_freq == 0 or step == (FLAGS.max_steps - 1):
            x, y = get_xy_tensors(test_data.next_batch(test_data.num_examples))
            model.eval()
            out = model.forward(x)
            model.train()
            test_loss = loss_fn(out, y).item()
            test_acc = accuracy(out, y)
            print('[{}/{}]  test accuracy: {:6f}'.format(
                step, FLAGS.max_steps, test_acc))

            _run.log_scalar('test-loss', test_loss, step)
            _run.log_scalar('test-acc', test_acc, step)
示例#6
0
def train():
    """
    Performs training and evaluation of MLP model.

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    # DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    # Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    # -------------------------- UNCKECKED -------------------
    # initialize tensorboard
    run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_mlp")
    if batchnorm:
        run_id = run_id + '_batchnorm'
    log_dir = 'tensorboard/' + run_id
    writer = SummaryWriter(log_dir=log_dir)

    # get the dataset
    data_set = cifar10_utils.get_cifar10(FLAGS.data_dir)

    # get dataset information
    n_batches = {
        'train': int(data_set['train']._num_examples / FLAGS.batch_size),
        'validation':
        int(data_set['validation']._num_examples / FLAGS.batch_size),
        'test': int(data_set['test']._num_examples / FLAGS.batch_size)
    }
    image_shape = data_set['train'].images[0].shape
    n_inputs = image_shape[0] * image_shape[1] * image_shape[2]
    n_classes = data_set['train'].labels[0].shape[0]

    # get the necessary components
    classifier = MLP(n_inputs, dnn_hidden_units, n_classes, dropout,
                     batchnorm).to(device)
    loss_function = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(classifier.parameters(),
                                 lr=FLAGS.learning_rate,
                                 weight_decay=weight_decay)

    # list of training accuracies and losses
    train_accuracies = []
    train_losses = []

    # list of test accuracies and losses
    test_accuracies = []
    test_losses = []

    epoch_test_accuracy = 0
    epoch_test_loss = 0

    # training loop
    for step in range(FLAGS.max_steps):

        # get current batch...
        images, labels = data_set['train'].next_batch(FLAGS.batch_size)
        images = images.reshape(FLAGS.batch_size, n_inputs)

        # ...in the gpu
        images = torch.from_numpy(images).type(dtype).to(device=device)
        labels = torch.from_numpy(labels).type(dtype).to(device=device)

        # forward pass
        classifier.train()
        predictions = classifier.forward(images)

        # compute loss
        class_labels = labels.argmax(dim=1)
        loss = loss_function(predictions, class_labels)

        # reset gradients before backwards pass
        optimizer.zero_grad()

        # backward pass
        loss.backward()

        # update weights
        optimizer.step()

        # get accuracy and loss for the batch
        train_accuracy = accuracy(predictions, labels)
        train_accuracies.append(train_accuracy)

        writer.add_scalar("Training accuracy vs steps", train_accuracy, step)

        train_losses.append(loss.item())
        writer.add_scalar("Training loss vs steps", loss.item(), step)

        if ((step + 1) % 100) == 0 or step == 0:
            print("\nStep", step + 1)
            print("\tTRAIN:", round(train_accuracy * 100, 1), "%")

        # run evaluation every eval_freq epochs
        if (step + 1) % FLAGS.eval_freq == 0 or (step + 1) == FLAGS.max_steps:

            # list of test batch accuracies and losses for this step
            step_test_accuracies = []
            step_test_losses = []

            # get accuracy on the test set
            classifier.eval()
            for batch in range(n_batches['test']):
                # get current batch...
                images, labels = data_set['test'].next_batch(FLAGS.batch_size)
                images = images.reshape(FLAGS.batch_size, n_inputs)

                # ...in the gpu
                images = torch.from_numpy(images).type(dtype).to(device=device)
                labels = torch.from_numpy(labels).type(dtype).to(device=device)

                # forward pass
                predictions = classifier(images)

                # compute loss
                class_labels = labels.argmax(dim=1)
                loss = loss_function(predictions, class_labels)

                # get accuracy and loss for the batch
                step_test_accuracies.append(accuracy(predictions, labels))
                step_test_losses.append(loss.item())

            # store accuracy and loss
            epoch_test_accuracy = np.mean(step_test_accuracies)
            test_accuracies.append(epoch_test_accuracy)

            epoch_test_loss = np.mean(step_test_losses)
            test_losses.append(epoch_test_loss)

            print("\tTEST:", round(epoch_test_accuracy * 100, 1), "%")

        writer.add_scalar("Test accuracy vs epochs", epoch_test_accuracy, step)
        writer.add_scalar("Test loss vs epochs", epoch_test_loss, step)

    print("\nBest TEST:", round(max(test_accuracies) * 100, 1), "%")

    # save results
    results = {
        'train_accuracies': train_accuracies,
        'train_losses': train_losses,
        'test_accuracies': test_accuracies,
        'test_losses': test_losses,
        'eval_freq': FLAGS.eval_freq
    }

    if not os.path.exists("results/"):
        os.makedirs("results/")
    with open("results/" + run_id + "_results.pkl", "wb") as file:
        pkl.dump(results, file)

    writer.close()
def train():
  """
  Performs training and evaluation of MLP model.

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)

  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  # Get negative slope parameter for LeakyReLU
  neg_slope = FLAGS.neg_slope


  ########################
  # PUT YOUR CODE HERE  #
  #######################
  # raise NotImplementedError

  acc_param_search = []

  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
  cifar10_set = cifar10_utils.get_cifar10(FLAGS.data_dir)

  x, y = cifar10_set['train'].next_batch(FLAGS.batch_size)
  print("The size of the dataset is: " + str(cifar10_set['train'].num_examples))
  x = x.reshape(FLAGS.batch_size, -1)

  out_dim = y.shape[1]
  in_dim = x.shape[1]

  hu = 4
  lr_list = [1e-2, 1.5e-3, 1.25e-3, 1e-3 , 1e-4]
  wd_list = [1e-4, 5e-4, 1e-5, 5e-5]
  dnn_hidden_units[0] = 600
  for i in range(0, hu):
    dnn_hidden_units.append(int(500 - (450 * (i / hu))))
  for lr in lr_list:
    for wd in wd_list:
      loss_train = []
      acc_train = []
      acc_test = []
      print('Testing Parameters layers ' + str((hu * 2) + 3) + '_learning_rate_' + str(
        lr) + '_weightdecay_' + str(wd))
      max_acc = 0

      mlp = MLP(in_dim, dnn_hidden_units, out_dim, neg_slope).to(device)
      #print('This is SGD')
      # optimizer = torch.optim.SGD(mlp.parameters(), lr = FLAGS.learning_rate)
      print("Opt is Adam")
      # optimizer = torch.optim.Adam(mlp.parameters(), lr = FLAGS.learning_rate)
      optimizer = torch.optim.Adam(mlp.parameters(),lr = lr, weight_decay = wd)
      #  lr=1.25e-3
      loss_funct = nn.CrossEntropyLoss()

      #Adding regularization
      reg_on = False
      dropout_on = False
      reg_const = 0.00001
      # steps = 500
      steps = int((cifar10_set['train'].num_examples/FLAGS.batch_size) * 10)
      # dataset is size 50,000
      print(steps)
      # dataset is size 50,000

      for i in range(0, steps + 1):
        x, t = cifar10_set['train'].next_batch(FLAGS.batch_size)
        x = torch.tensor(x.reshape(FLAGS.batch_size, -1), dtype=torch.float32).to(device)
        y = mlp.forward(x)
        loss = loss_funct(y,torch.LongTensor(np.argmax(t, 1)).to(device))
        if reg_on:
          for mod in mlp.modls:
            if type(mod) == nn.Linear:
              loss += loss + (torch.sum(torch.abs(mod.weight))*reg_const)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i % FLAGS.eval_freq == 0:
          loss_train.append(loss)
          acc_train.append(accuracy(y.cpu().detach().numpy(), t))
          x,t = cifar10_set['test'].images, cifar10_set['test'].labels
          x = torch.tensor(x.reshape(x.shape[0], -1), dtype=torch.float32).to(device)
          y = mlp.forward(x)
          acc_test.append(accuracy(y.cpu().detach().numpy(),t))
      max_acc = np.array(acc_test).max()
      print('The max found for these settings: layers ' + str((hu*2)+3) + '_learning_rate_' + str(lr) +'_weightdecay_' + str(wd) + 'was :' +str(max_acc))
      acc_param_search.append(max_acc)


      #Plotting the accuracy of test and train:
      # plt.figure(0, figsize = (17,10))
      plt.figure(0)
      plt.plot(np.arange(0, len(acc_train) * FLAGS.eval_freq * FLAGS.batch_size, FLAGS.eval_freq* FLAGS.batch_size) / cifar10_set['train'].num_examples, acc_train, label='Train')
      plt.plot(np.arange(0, len(acc_train) * FLAGS.eval_freq* FLAGS.batch_size, FLAGS.eval_freq* FLAGS.batch_size) / cifar10_set['train'].num_examples, acc_test, label='Test')
      plt.xlabel('Epoch')
      plt.ylabel('Accuracy')
      plt.title('Accuracy of Train and Test Set Through Training')
      plt.legend()
      acc_loc = 'figs/loss_adam_' + str((hu*2)+3) + '_learning_rate_' + str(lr) +'_weightdecay_' + str(wd) +'.png'
      plt.savefig(acc_loc)
      # plt.show()

      # plt.figure(1, figsize=(17,10))
      plt.figure(1)
      plt.plot(np.arange(0, len(loss_train)*FLAGS.eval_freq* FLAGS.batch_size, FLAGS.eval_freq* FLAGS.batch_size)/cifar10_set['train'].num_examples, loss_train, label = 'Train')
      plt.xlabel('Epoch')
      plt.ylabel('Loss')
      plt.title('Loss Through Training')
      loss_loc = 'figs/loss_adam_' + str((hu * 2) + 3) + '_learning_rate_' + str(lr) + '_weightdecay_' + str(wd) + '.png'
      plt.savefig(loss_loc)
      # plt.show()
      # plt.legend()
      ########################
      # END OF YOUR CODE    #
      #######################
  print(acc_param_search)
  np.save(acc_grid_srch_4, acc_param_search)
def train():
    """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    data = cifar10_utils.get_cifar10(FLAGS.data_dir)
    n_inputs = 3 * 32 * 32
    n_classes = 10
    batches_per_epoch = (int)(data['test'].images.shape[0] /
                              FLAGS.batch_size)  # need this for test set
    model = MLP(n_inputs, dnn_hidden_units, n_classes).to(device)
    loss_fn = nn.CrossEntropyLoss()
    optimizer = None
    if FLAGS.optimizer == "Adam":
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=FLAGS.learning_rate,
                                     weight_decay=FLAGS.weight_decay)
    if FLAGS.optimizer == "SGD":
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=FLAGS.learning_rate,
                                    weight_decay=FLAGS.weight_decay,
                                    momentum=FLAGS.momentum)
    if FLAGS.optimizer == "RMSprop":
        optimizer = torch.optim.RMSprop(model.parameters(),
                                        lr=FLAGS.learning_rate,
                                        weight_decay=FLAGS.weight_decay,
                                        momentum=FLAGS.momentum)
    max_accuracy = 0.0
    start_time = time.perf_counter()
    for step in range(1, FLAGS.max_steps + 1):
        x, y = get_batch(data, 'train', FLAGS.batch_size, device)
        predictions = model.forward(x)
        training_loss = loss_fn(predictions, y.argmax(dim=1))
        optimizer.zero_grad()
        training_loss.backward()
        optimizer.step()
        if step == 1 or step % FLAGS.eval_freq == 0:
            with torch.no_grad():
                test_loss = 0
                test_acc = 0
                for test_batch in range(batches_per_epoch):
                    x, y = get_batch(data, 'test', FLAGS.batch_size, device)
                    predictions = model(x)
                    test_loss += loss_fn(predictions,
                                         y.argmax(dim=1)) / batches_per_epoch
                    test_acc += accuracy(predictions, y) / batches_per_epoch
                if test_acc > max_accuracy:
                    max_accuracy = test_acc
                print(
                    "step %d/%d: training loss: %.3f test loss: %.3f accuracy: %.1f%%"
                    % (step, FLAGS.max_steps, training_loss, test_loss,
                       test_acc * 100))

    time_taken = time.perf_counter() - start_time
    csv = open("results.csv", "a+")
    csv.write("%s;%s;%f;%f;%f;%d;%d;%d;%f;%.3f\n" %
              (FLAGS.dnn_hidden_units, FLAGS.optimizer, FLAGS.learning_rate,
               FLAGS.momentum, FLAGS.weight_decay, FLAGS.batch_size,
               FLAGS.max_steps, FLAGS.eval_freq, max_accuracy, time_taken))
    csv.close()
    print("Done. Scored %.1f%% in %.1f seconds." %
          (max_accuracy * 100, time_taken))
示例#9
0
def train():
  """
  Performs training and evaluation of MLP model.

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)
  torch.manual_seed(42)
  # torch.backends.cudnn.deterministic = True
  # torch.backends.cudnn.benchmark = False

  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  # Get negative slope parameter for LeakyReLU
  neg_slope = FLAGS.neg_slope
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  # print("[DEBUG], Device ", device)

  ########################
  # PUT YOUR CODE HERE  #
  #######################
  cifar10 = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir)
  train_data = cifar10['train']

  # 60000 x 3 x 32 x32 -> 60000 x 3072, input vector 3072
  n_inputs = train_data.images.reshape(train_data.images.shape[0], -1).shape[1]
  n_hidden = dnn_hidden_units
  n_classes = train_data.labels.shape[1]

  # print(f"[DEBUG] n_inputs {n_inputs}, n_classes {n_classes}")

  model = MLP(n_inputs, n_hidden, n_classes, FLAGS.neg_slope)
  model.to(device)

  params = model.parameters()

  if FLAGS.optimizer == 'Adam':
    optimizer = torch.optim.Adam(params, lr=FLAGS.learning_rate)
  elif FLAGS.optimizer == 'Adamax':
    optimizer = torch.optim.Adamax(params, lr=FLAGS.learning_rate)
  elif FLAGS.optimizer == 'Adagrad':
    optimizer = torch.optim.Adagrad(params, lr=FLAGS.learning_rate)
  elif FLAGS.optimizer == 'Adadelta':
    optimizer = torch.optim.Adadelta(params, lr=FLAGS.learning_rate)
  elif FLAGS.optimizer == 'SparseAdam':
    optimizer = torch.optim.SparseAdam(params, lr=FLAGS.learning_rate)
  else:
    optimizer = torch.optim.SGD(params,lr=FLAGS.learning_rate)


  criterion = torch.nn.CrossEntropyLoss()
  train_acc_plot = []
  test_acc_plot = []
  loss_train = []
  loss_test = []
  rloss = 0
  best_accuracy = 0
  # print('[DEBUG] start training')

  for i in range(0, FLAGS.max_steps):
    x, y = cifar10['train'].next_batch(FLAGS.batch_size)
    x, y = torch.from_numpy(x).float().to(device) , torch.from_numpy(y).float().to(device)
    x = x.reshape(x.shape[0], -1)

    out = model.forward(x)
    loss = criterion.forward(out, y.argmax(1))
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    rloss += loss.item()

    if i % FLAGS.eval_freq == 0:
      train_accuracy =  accuracy(out, y)
      with torch.no_grad():
        test_accuracys, test_losses = [] ,[]
        for j in range(0, FLAGS.max_steps):
          test_x, test_y = cifar10['test'].next_batch(FLAGS.batch_size)
          test_x, test_y = torch.from_numpy(test_x).float().to(device) , torch.from_numpy(test_y).float().to(device)

          test_x = test_x.reshape(test_x.shape[0], -1)

          test_out  = model.forward(test_x)
          test_loss = criterion(test_out, test_y.argmax(1))
          test_accuracy = accuracy(test_out, test_y)
          if device == 'cpu':
            test_losses.append(test_loss)
          else:
            test_losses.append(test_loss.cpu().data.numpy())

          test_accuracys.append(test_accuracy)
        t_acc = np.array(test_accuracys).mean()
        t_loss = np.array(test_losses).mean()
        train_acc_plot.append(train_accuracy)
        test_acc_plot.append(t_acc)
        loss_train.append(rloss/(i + 1))
        loss_test.append(t_loss)
        print(f"iter {i}, train_loss_avg {rloss/(i + 1)}, test_loss_avg {t_loss}, train_acc {train_accuracy}, test_acc_avg {t_acc}")
        if t_acc > best_accuracy:
          best_accuracy = t_acc

  print(f"Best Accuracy {best_accuracy}",flush=True)
  if FLAGS.plot:
    print('Start plotting...')
    fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
    ax1.plot(np.arange(len(train_acc_plot)), train_acc_plot, label='training')
    ax1.plot(np.arange(len(test_acc_plot)), test_acc_plot, label='testing')
    ax1.set_title('Training evaluation batch size '+str(FLAGS.batch_size)+' learning rate '+str(FLAGS.learning_rate)+ '\n best accuracy '+str(best_accuracy) )
    ax1.set_ylabel('Accuracy')
    ax1.legend()
    ax2.plot(np.arange(len(loss_train)), loss_train, label='Train Loss')
    ax2.plot(np.arange(len(loss_test)), loss_test, label='Test Loss')
    ax2.set_title('Loss evaluation')
    ax2.set_ylabel('Loss')
    ax2.legend()
    plt.xlabel('Iteration')
    plt.savefig('pytorch.png')
示例#10
0
def train():
    """
    Performs training and evaluation of MLP model.

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    # initialize required arrays for saving the results
    print(torch.cuda.is_available())
    # device = torch.device("cpu") # my gpu is not cuda conform
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_accuracies = []
    train_losses = []
    test_accuracies = []
    test_losses = []
    steps = []

    # load data from directory specified in the input
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)

    # load test images and labels
    test_images = cifar10['test'].images
    test_targets = cifar10['test'].labels

    # data dimensions
    # test_images.shape -> (10000, 3, 32, 32): n_images, channels, height, width
    # test_targets.shape <- (10000, 10): n_images, n_classes
    n_test = test_images.shape[0]
    # n_inputs is one vector for all channels of width and height
    # n_input = n_channel * width * height
    n_inputs = test_images.shape[1] * test_images.shape[2] * test_images.shape[3]
    # reshape to (n_samples, n_inputs)
    test_images = test_images.reshape((n_test, n_inputs))
    n_classes = 10

    # use torch tensors instead of np arrays, no grad needed as model is not trained on test images
    test_images = torch.tensor(test_images, requires_grad=False).to(device)
    test_targets = torch.tensor(test_targets, requires_grad=False).to(device)

    # initialize MLP model
    MLP_model = MLP(n_inputs=n_inputs, n_hidden=dnn_hidden_units, n_classes=n_classes, neg_slope=FLAGS.neg_slope)
    print(MLP_model)
    # loss function os loaded
    loss_module = nn.CrossEntropyLoss()

    learning_rate = FLAGS.learning_rate

    if OPTIMIZER == "SGD":
        optimizer = torch.optim.SGD(MLP_model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    else:
        optimizer = torch.optim.Adam(MLP_model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    batch_size = FLAGS.batch_size
    # extract max accuracy while training on test set
    max_acc = 0
    max_iter = 0

    # optimizer = torch.optimAdam(MLP_model.parameters(), lr=lr)
    for iteration in range(FLAGS.max_steps):

        train_images, train_targets = cifar10['train'].next_batch(batch_size)
        # input to MLP.forward is (batch_size, n_inputs)
        train_images = train_images.reshape((batch_size, n_inputs))

        # switch from numpy version to tensor and to device
        train_images = torch.tensor(train_images).type(torch.FloatTensor).to(device)
        train_targets = torch.tensor(train_targets).type(torch.LongTensor).to(device)

        if iteration % LR_FREQ == 0:
            learning_rate = learning_rate * 0.8
            optimizer = torch.optim.SGD(MLP_model.parameters(), lr=learning_rate,
                                        weight_decay=weight_decay)

        # gradients zero initialized
        optimizer.zero_grad()

        # predictions by forward pass
        train_predictions = MLP_model.forward(train_images)

        # loss acc to loss module, predictions and targets
        loss = loss_module(train_predictions, train_targets.argmax(dim=1))

        # Apply backward pass: MLP backward takes gradients of losses = dout
        # dout = backward of loss module
        loss.backward()
        # backward pass from loss (dout)
        optimizer.step()

        train_accuracies.append(accuracy(train_predictions, train_targets))
        train_losses.append(loss)
        steps.append(iteration)

        ## Save training statistics
        # save loss, acc, iteration for train evaluation afterwards
        if iteration % 100 == 0:
            print("iteration:" + str(iteration) + "train_acc:" + str(np.mean(train_accuracies)))

        # Consider FLAGS.EVAL_FREQ_DEFAULT for the evaluation of the current MLP
        # on the test data and training data
        if iteration % FLAGS.eval_freq == 0:
            ## Test Statistics
            test_predictions = MLP_model.forward(test_images)
            test_loss = loss_module.forward(test_predictions, test_targets.argmax(dim=1))
            test_acc = accuracy(test_predictions, test_targets)
            test_accuracies.append(test_acc)
            print("iteration:" + str(iteration) + "test_acc:" + str(test_accuracies[-1]))
            test_losses.append(test_loss)
            if (max_acc < test_acc):
                max_acc = test_acc
                max_iter = iteration

    print('Training is done')
    print('Save results in folder: .')
    # save loss and accuracies to plot from for report
    # folder for numpy results

    print('Training is done')
    print('Plot Results')

    plot_results(train_accuracies, test_accuracies, train_losses, test_losses)
    print("max accuracy: " + str(max_acc) + " at iteration: " + str(max_iter))
def train():
    """
  Performs training and evaluation of MLP model. 

  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []
    print("arch: ", dnn_hidden_units)

    ########################
    # PUT YOUR CODE HERE  #
    #######################

    device = torch.device("cuda")

    dataset = cifar10_utils.get_cifar10()
    training = dataset['train']
    test = dataset['test']

    test_images = Variable(
        torch.tensor(test.images.reshape(test.images.shape[0], -1)))
    test_labels = torch.tensor(test.labels)

    model = MLP(n_inputs=32 * 32 * 3, n_hidden=dnn_hidden_units,
                n_classes=10).to(device)

    opt = torch.optim.SGD(model.parameters(), lr=FLAGS.learning_rate)
    ce = nn.CrossEntropyLoss()

    test_accuracy = []
    train_accuracy = []
    loss_list = []

    for epoch in range(FLAGS.max_steps):

        x, y = training.next_batch(FLAGS.batch_size)
        x = Variable(torch.tensor(x).to(device))
        y = Variable(torch.tensor(y).to(device))

        opt.zero_grad()
        out = model.forward(x.reshape(FLAGS.batch_size, -1))
        loss = ce(out, y.max(1)[1])
        loss_list.append(float(loss))
        loss.backward()
        opt.step()

        if not epoch % FLAGS.eval_freq:
            train_accuracy.append(accuracy(out, y))
            out = model.forward(test_images.to(device))
            test_accuracy.append(accuracy(out, test_labels.to(device)))
            print('Epoch: ', epoch, 'Loss: ', loss, 'Accuracy: ',
                  train_accuracy[-1], 'Test ac.:', test_accuracy[-1])

    out = model.forward(test_images.to(device))
    print('Test accuracy: ', accuracy(out, test_labels.to(device)))

    import seaborn as sns
    import matplotlib.pyplot as plt
    f, axes = plt.subplots(1, 2)
    ax = sns.lineplot(np.arange(0, MAX_STEPS_DEFAULT, EVAL_FREQ_DEFAULT),
                      train_accuracy,
                      ax=axes[0])
    ax = sns.lineplot(np.arange(0, MAX_STEPS_DEFAULT, EVAL_FREQ_DEFAULT),
                      test_accuracy,
                      ax=axes[0])
    ax.set_title('Training and test accuracy')
    ax.legend(['training', 'test'])
    ax = sns.lineplot(np.arange(0, MAX_STEPS_DEFAULT, 1),
                      loss_list,
                      ax=axes[1])
    ax.set_title('Loss')
    figure = ax.get_figure()
    figure.savefig("mlp-pytorch-results")
示例#12
0
def train():
    """
    Performs training and evaluation of MLP model.

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #

    # because I don't have a GPU and the training was quick enough on a CPU,
    # I don't save my tensor on a GPU

    LEARNING_RATE_DEFAULT = FLAGS.learning_rate
    MAX_STEPS_DEFAULT = FLAGS.max_steps
    BATCH_SIZE_DEFAULT = FLAGS.batch_size
    EVAL_FREQ_DEFAULT = FLAGS.eval_freq
    OPTIMIZER_DEFAULT = FLAGS.optimizer

    # self-added variables
    REGULARIZER_DEFAULT = FLAGS.regularizer
    MOMENTUM_DEFAULT = FLAGS.momentum

    # get test data to initialize the model with
    cifar10 = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT)

    x_test, y_test = cifar10['test'].images, cifar10['test'].labels

    input_size = np.shape(x_test)[1] * np.shape(x_test)[2] * np.shape(
        x_test)[3]
    class_size = np.shape(y_test)[1]

    x_test = torch.from_numpy(x_test.reshape([np.shape(x_test)[0],
                                              input_size]))
    y_test = torch.from_numpy(y_test)

    net = MLP(n_inputs=input_size,
              n_hidden=dnn_hidden_units,
              n_classes=class_size)

    criterion = torch.nn.CrossEntropyLoss()

    eval_accuracies = []
    train_accuracies = []

    eval_loss = []
    train_loss = []

    # choose between optimizer
    if OPTIMIZER_DEFAULT == 'sgd':
        optimizer = optim.SGD(net.parameters(),
                              lr=LEARNING_RATE_DEFAULT,
                              momentum=MOMENTUM_DEFAULT,
                              weight_decay=REGULARIZER_DEFAULT)
    elif OPTIMIZER_DEFAULT == 'adam':
        optimizer = optim.Adam(net.parameters(),
                               lr=LEARNING_RATE_DEFAULT,
                               weight_decay=REGULARIZER_DEFAULT)

    for step in range(MAX_STEPS_DEFAULT):
        x, y = cifar10['train'].next_batch(BATCH_SIZE_DEFAULT)
        x = x.reshape([np.shape(x)[0], input_size])
        x = torch.from_numpy(x)
        y = torch.from_numpy(y)
        optimizer.zero_grad()

        out = net.forward(x)
        # convert out and y to index of max (class prediction)?

        # required?
        # x = x.argmax(dim=1)

        loss = criterion(out, y.argmax(dim=1))
        loss.backward()
        optimizer.step()
        # print(loss.item())

        if step % EVAL_FREQ_DEFAULT == 0:

            test_out = net.forward(x_test)
            # print(accuracy(test_out, y_test))
            eval_accuracies.append(accuracy(test_out, y_test))
            train_accuracies.append(accuracy(out, y))

            eval_loss.append(
                criterion(test_out, y_test.argmax(dim=1)).data.item())
            train_loss.append(criterion(out, y.argmax(dim=1)).data.item())
    # final accuracy calculation

    test_out = net.forward(x_test)
    print("EVAL ACCURACY")
    print(eval_accuracies)
    print("train ACCURACY")
    print(train_accuracies)
    print("EVAL loss")
    print(eval_loss)
    print("train loss")
    print(train_loss)
def train():
    """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################

    # loop through data
    cifar10 = cifar10_utils.get_cifar10('cifar10/cifar-10-batches-py')
    x, y = cifar10['train'].next_batch(BATCH_SIZE_DEFAULT)
    print(y.shape)
    print(x.shape)
    x = x.reshape(np.size(x, 0), -1)

    n_input = np.size(x, 1)

    # create model
    net = MLP(n_input, dnn_hidden_units, 10)

    # get loss function and optimizer
    crossEntropy = nn.CrossEntropyLoss()

    optimizer = torch.optim.SGD(net.parameters(), lr=FLAGS.learning_rate)

    for i in range(FLAGS.max_steps):

        x = Variable(torch.from_numpy(x), requires_grad=True)

        out = net(x)
        out_numpy = out.data[:].numpy()

        # apply cross entropy
        label_index = np.argmax(y, axis=1)
        label_index = torch.LongTensor(label_index)

        loss = crossEntropy(out, label_index)

        if i % FLAGS.eval_freq == 0:
            print(accuracy(out_numpy, y))
            print(loss)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # insert data
        x, y = cifar10['train'].next_batch(FLAGS.batch_size)
        x = x.reshape(np.size(x, 0), -1)

    # test
    x, y = cifar10['test'].images, cifar10['test'].labels
    x = x.reshape(np.size(x, 0), -1)

    x = Variable(torch.from_numpy(x), requires_grad=False)
    out = net(x)
    out_numpy = out.data[:].numpy()
    print("The accuracy on the test set is:")
    print(accuracy(out_numpy, y))
示例#14
0
def train():
  """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)

  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  ########################
  # PUT YOUR CODE HERE  #
  #######################
  ce_loss = nn.CrossEntropyLoss()
  n_inputs = 3 * 32 * 32
  n_classes = 10
  mlp = MLP(n_inputs, dnn_hidden_units, n_classes)
  optimizer = optim.SGD(
    mlp.parameters(), lr = FLAGS.learning_rate, weight_decay=0.001)

  c10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
  test_data = c10['test'].images
  test_data = test_data.reshape(test_data.shape[0], -1)
  test_data = torch.tensor(test_data)

  acc_values = []
  loss_values = []


  for i in range(FLAGS.max_steps): #range(FLAGS.max_steps) 
    x, y = c10['train'].next_batch(FLAGS.batch_size)
    x = x.reshape(FLAGS.batch_size, -1)
    y = y.argmax(axis=1)
    x = torch.tensor(x)
    y = torch.tensor(y)

    optimizer.zero_grad()
    out = mlp(x)
    loss = ce_loss(out, y)
    loss.backward()
    optimizer.step()  
    loss_values.append(loss.item())

    # evaluate
    if i % FLAGS.eval_freq == 0: 
      predictions = mlp.forward(test_data).detach().numpy()
      targets = c10['test'].labels
      acc = accuracy(predictions, targets)
      print('acc', acc, 'loss', loss.item())
      acc_values.append(acc)

  # save loss and accuracy to file
  with open('accuracy_torch.txt', 'a') as f_acc:
    print (acc_values, file=f_acc)
  with open('loss_torch.txt', 'a') as f_loss:
    print (loss_values, file=f_loss)
示例#15
0
def train():
    """
    Performs training and evaluation of MLP model. 

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    # DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    def reshape_cifar10_mlp(x):
        batch_size = x.shape[0]
        x = x.transpose([2, 3, 1, 0])
        x = x.reshape([-1, batch_size])
        x = x.transpose()
        return x

    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
    x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size)
    x_train = reshape_cifar10_mlp(x_train)
    x_train = torch.from_numpy(x_train).to(device)
    y_train = torch.from_numpy(y_train).to(device)

    crossent_softmax = nn.CrossEntropyLoss()
    mlp = MLP(x_train.shape[1], dnn_hidden_units, y_train.shape[1], bn_flag=True)
    # optimizer = torch.optim.SGD(mlp.parameters(), lr=FLAGS.learning_rate)
    optimizer = torch.optim.Adam(mlp.parameters(), weight_decay=1e-3)
    mlp.to(device)

    train_accs = []
    train_losses = []
    eval_accs = []
    eval_losses = []
    for i in np.arange(FLAGS.max_steps):
        print('\nStep: {}\n'.format(i))
        print('Training: ')
        optimizer.zero_grad()
        logits = mlp(x_train)
        train_loss = crossent_softmax(logits, y_train.argmax(dim=-1))
        train_acc = accuracy(logits, y_train)
        print('loss: {:.4f}, acc: {:.4f}\n'.format(train_loss, train_acc))

        train_loss.backward()
        optimizer.step()

        x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size)
        x_train = reshape_cifar10_mlp(x_train)
        x_train = torch.from_numpy(x_train).to(device)
        y_train = torch.from_numpy(y_train).to(device)
        if i % FLAGS.eval_freq == 0:
            with torch.no_grad():
                print('Evaluation: ')
                x_eval, y_eval = cifar10['test'].images, cifar10['test'].labels
                x_eval = reshape_cifar10_mlp(x_eval)
                x_eval = torch.from_numpy(x_eval).to(device)
                y_eval = torch.from_numpy(y_eval).to(device)

                logits = mlp(x_eval)
                eval_loss = crossent_softmax(logits, y_eval.argmax(dim=-1))
                eval_acc = accuracy(logits, y_eval)

                train_losses.append(train_loss)
                train_accs.append(train_acc)
                eval_losses.append(eval_loss)
                eval_accs.append(eval_acc)
                print('loss: {:.4f}, acc: {:.4f}'.format(eval_loss, eval_acc))
    print('Evaluation: ')
    x_eval, y_eval = cifar10['test'].images, cifar10['test'].labels
    x_eval = reshape_cifar10_mlp(x_eval)
    x_eval = torch.from_numpy(x_eval).to(device)
    y_eval = torch.from_numpy(y_eval).to(device)

    logits = mlp(x_eval)
    eval_loss = crossent_softmax(logits, y_eval.argmax(dim=-1))
    eval_acc = accuracy(logits, y_eval)

    train_losses.append(train_loss)
    train_accs.append(train_acc)
    eval_losses.append(eval_loss)
    eval_accs.append(eval_acc)
    print('loss: {:.4f}, acc: {:.4f}'.format(eval_loss, eval_acc))

    print('Finished training.')

    plt.figure(figsize=(10, 5))
    plt.plot(np.arange(len(train_losses)), train_losses, label='training loss')
    plt.plot(np.arange(len(eval_losses)), eval_losses, label='evaluation loss')
    plt.ylim(0, 3)
    plt.legend()
    plt.xlabel('Iterations [x{}]'.format(FLAGS.eval_freq))
    plt.savefig('results/mlp_loss_torch_adam_layers_maxstep_reg_batch.png', bbox_inches='tight')

    plt.figure(figsize=(10, 5))
    plt.plot(np.arange(len(train_accs)), train_accs, label='training accuracy')
    plt.plot(np.arange(len(eval_accs)), eval_accs, label='evaluation accuracy')
    plt.legend()
    plt.xlabel('Iterations [x{}]'.format(FLAGS.eval_freq))
    plt.savefig('results/mlp_acc_torch_adam_layers_maxstep_reg_batch.png', bbox_inches='tight')
def train():
    """
    Performs training and evaluation of MLP model.
  
    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    # select which device to train the model on
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    # compute the input size of the MLP
    input_size, n_classes = 3 * 32 * 32, 10

    # init model, define the dataset, loss function and optimizer
    model = MLP(input_size, dnn_hidden_units, n_classes, FLAGS.b).to(device)
    dataset = cifar10_utils.get_cifar10(FLAGS.data_dir)
    loss_fn = torch.nn.CrossEntropyLoss().to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=FLAGS.learning_rate)

    for step in range(FLAGS.max_steps):
        X_train, y_train = dataset['train'].next_batch(FLAGS.batch_size)
        optimizer.zero_grad()

        # move to correct device and shape for MLP
        X_train, y_train = torch.tensor(X_train).reshape(
            FLAGS.batch_size, input_size).float().to(device), torch.tensor(
                y_train).float().to(device)

        predictions = model(X_train)
        train_loss = loss_fn(predictions, y_train.argmax(1).long())

        train_loss.backward()
        optimizer.step()

        # add the loss and accuracy to the lists for plotting
        train_overall_loss.append(train_loss.cpu().detach().sum())
        train_overall_accuracy.append(
            accuracy(predictions.cpu().detach(),
                     y_train.cpu().detach()))
        train_x_axis.append(step)

        # test the model when eval freq is reached or if it is the last step
        if not step % FLAGS.eval_freq or step + 1 == FLAGS.max_steps:
            model.eval()
            test_accuracies, test_losses_list = [], []

            # test batchwise since it doesnot fit my gpu
            for X_test, y_test in cifar_test_generator(dataset):
                X_test, y_test = torch.tensor(X_test).reshape(
                    FLAGS.batch_size, input_size).float().to(
                        device), torch.tensor(y_test).float().to(device)

                predictions = model(X_test)
                test_loss = loss_fn(predictions, y_test.argmax(1).long())
                test_accuracy = accuracy(predictions, y_test)

                # add the values to compute the average loss and accuracy for the entire testset
                test_accuracies.append(test_accuracy.cpu().detach())
                test_losses_list.append(test_loss.cpu().detach().sum())

            print(
                "[{:5}/{:5}] Train loss {:.5f} Test loss {:.5f} Test accuracy {:.5f}"
                .format(step, FLAGS.max_steps, train_loss, test_loss,
                        sum(test_accuracies) / len(test_accuracies)))

            test_overall_accuracy.append(
                sum(test_accuracies) / len(test_accuracies))
            test_overall_loss.append(
                sum(test_losses_list) / len(test_losses_list))
            test_x_axis.append(step)

            model.train()

    plt.plot(train_x_axis, train_overall_loss, label="Avg Train loss")
    plt.plot(test_x_axis, test_overall_loss, label="Avg Test loss")
    plt.legend()
    plt.savefig("pytorch_loss_curve")
    plt.show()

    plt.plot(train_x_axis,
             train_overall_accuracy,
             label="Train batch accuracy")
    plt.plot(test_x_axis, test_overall_accuracy, label="Test set accuracy")
    plt.legend()
    plt.savefig("pytorch_accuracy_curve")
    plt.show()
def train():
  """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)
  torch.manual_seed(42)
  
  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  ########################
  # PUT YOUR CODE HERE  #
  #######################
  net = MLP(3072, dnn_hidden_units, 10)
  net.to(device)
  criterion = nn.CrossEntropyLoss()
  optimizer = optim.Adam(net.parameters(), lr = FLAGS.learning_rate)

  #Load cifar10
  cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
  print()
  print()
  print("----------------------------------------------")
  print("\t \t Training")
  print("----------------------------------------------\n")
  pl_loss =[]
  average_loss =[]
  moving_average=0.0
  acc =[]
  count = 1
  acc =[]
  check =0
  for iter_ in np.arange(0, FLAGS.max_steps):

    #Load batches 
    x , y = cifar10['train'].next_batch(FLAGS.batch_size)
    
    labels = np.argmax(y, axis=1)
    
    #reshape x into vectors
    x = np.reshape(x, (200, 3072))
    inputs, labels = torch.from_numpy(x), torch.LongTensor(torch.from_numpy(labels))
    
    inputs, labels = inputs.to(device), labels.to(device)

    # # labels = torch.LongTensor(labels)
    
    # # zero the parameter gradients
    optimizer.zero_grad()

    # # forward + backward + optimize
    outputs = net(inputs)
    print("output: {}, labels:{}".format(outputs.size(),labels.size()))
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    # # print statistics
    running_loss = loss.item()
    pl_loss.append(running_loss)
    moving_average+=running_loss
    average_loss.append(np.mean(np.mean(pl_loss[:-100:-1])))
    print("iter: {} | training loss: {} ".format(iter_,"%.3f"%running_loss))

    
    if (iter_+1)%FLAGS.eval_freq==0:
      net.eval()
      acc.append(evaluate(net, cifar10, FLAGS.batch_size))

  #######################
  # END OF YOUR CODE    #
  #######################
  
  plt.plot(pl_loss,'r-', label="Batch loss", alpha=0.5)
  plt.plot(average_loss,'g-', label="Average loss", alpha=0.5)
  plt.legend()
  plt.xlabel("Iterations")
  plt.ylabel("Loss")
  plt.title("Training Loss")
  plt.grid(True)
  plt.show()
  plt.close()

  plt.plot(acc,'g-', alpha=0.5)
  plt.xlabel("Iterations")
  plt.ylabel("Accuracy")
  plt.title("Test Accuracy")
  plt.grid(True)
  plt.show()
  plt.close()
  print()
  print("TRAINING COMPLETED") 
示例#18
0
def train():
  """
  Performs training and evaluation of MLP model.

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)

  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  ########################
  # PUT YOUR CODE HERE  #
  #######################

  # prepare input data
  cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
  _, width, height, channels = cifar10['train']._images.shape
  _, n_outputs =  cifar10['train']._labels.shape
  n_inputs = width * height * channels

  network = MLP(n_inputs,dnn_hidden_units,n_outputs)

  optimizer = torch.optim.Adam(network.parameters(), lr=FLAGS.learning_rate) # or SGD?
  loss_fn = nn.CrossEntropyLoss()

  train_losses, train_acc, test_losses, test_acc = [], [], [], []
  current_loss = 0.0

  for step in range(FLAGS.max_steps):
      network.train()
      optimizer.zero_grad()

      x, y = cifar10['train'].next_batch(FLAGS.batch_size)
      x, y = torch.tensor(x, requires_grad=True), torch.tensor(y, dtype=torch.float)
      x = x.reshape(FLAGS.batch_size,-1)

      output = network(x)
      labels = torch.max(y,1)[1]

      loss = loss_fn(output, labels)
      loss.backward()
      optimizer.step()
      current_loss += loss.item()

      if (step+1) % FLAGS.eval_freq == 0:
          train_acc.append(accuracy(output, y))
          train_losses.append(current_loss / float(FLAGS.eval_freq))
          current_loss = 0.0


          x_test, y_test = cifar10['test'].next_batch(FLAGS.batch_size)
          x_test, y_test = torch.tensor(x_test, requires_grad=True), torch.tensor(y_test, dtype=torch.float)
          x_test = x_test.reshape(FLAGS.batch_size, -1)

          output_test = network(x_test)

          # average loss over 100 iterations

          test_losses.append(loss_fn(output_test, torch.max(y_test,1)[1]).item())
          test_acc.append(accuracy(output_test, y_test))

          print("Step {}".format(step))

  size_test = cifar10['test']._num_examples
  x, y = cifar10['test'].next_batch(size_test)
  x, y = torch.tensor(x, requires_grad=True), torch.tensor(y, dtype=torch.float)
  x = x.reshape(size_test, -1)

  # Get network output for batch and get loss and accuracy
  out = network(x)
  print("Accuracy: {}".format(accuracy(out, y)))

  # plot graph of accuracies
  plt.subplot(211)
  plt.plot(test_acc, label="test accuracy")
  plt.plot(train_acc, label="training accuracy")
  plt.title('Accuracy')
  plt.legend()

  plt.subplot(212)
  plt.plot(test_losses, label = "test loss")
  plt.plot(train_losses, label = "training loss")
  plt.title('Cross-entropy loss')
  plt.legend()

  plt.show()
示例#19
0
def train():
    """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """
    
    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    
    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
    else:
        dnn_hidden_units = []
    
    ########################
    # PUT YOUR CODE HERE  #
    #######################

    ############################## VARIABLES ##############################
    
    SAVE_PLOTS = False
    SAVE_LOGS = False
    
    img_size = 32
    n_classes = 10
    input_size = img_size * img_size * 3
    batch_size = FLAGS.batch_size
    eval_freq = FLAGS.eval_freq
    n_iterations = FLAGS.max_steps
    lr_rate = FLAGS.learning_rate

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Device:", device)

    ############################## METHODS ##############################
    
    # fp = open('memory_profiler_basic_mean.log', 'w+')
    # @profile(stream=fp)
    def test():
        net.eval()
        
        output_t = net(x_t)
        loss_t = criterion(output_t, y_t).detach()
        acc_t = accuracy(output_t.detach(), y_t_onehot)
        
        return acc_t, loss_t
    
    def plot(iteration):
        idx_test = list(range(0, iteration + 1, eval_freq))
        idx = list(range(0, iteration + 1))
        
        plt.clf()
        plt.cla()
        plt.subplot(1, 2, 1)
        plt.plot(idx_test, test_accuracies, "k-", linewidth=1, label="test")
        plt.plot(idx, accuracies, "r-", linewidth=0.5, alpha=0.5, label="train")
        plt.xlabel('iteration')
        plt.ylabel('accuracy')
        plt.legend()
        
        plt.subplot(1, 2, 2)
        plt.plot(idx_test, test_losses, "k-", linewidth=1, label="test")
        plt.plot(idx, losses, "r-", linewidth=0.5, alpha=0.5, label="train")
        plt.xlabel('iteration')
        plt.ylabel('loss')
        plt.legend()
        plt.savefig("./out/plot/plot_pytorch_" + str(batch_size) + "_" + str(lr_rate) + ".png", bbox_inches='tight')
        return
    
    def to_label(tensor):
        _, tensor = tensor.max(1)
        return tensor

    ############################## MAIN ##############################
    
    cifar10 = cifar10_utils.get_cifar10('cifar10/cifar-10-batches-py')
    
    net = MLP(input_size, dnn_hidden_units, n_classes)
    net.to(device)
    
    criterion = nn.CrossEntropyLoss()
    
    # optimizer = optim.SGD(net.parameters(), lr=lr_rate, momentum=0.8, nesterov=False)
    optimizer = optim.Adam(net.parameters(), lr=lr_rate)
    
    losses = []
    accuracies = []
    test_accuracies = []
    test_losses = []
    alpha = 0.0001

    x_t = cifar10['test'].images
    y_t = cifar10['test'].labels
    x_t = torch.from_numpy(x_t.reshape(-1, input_size))
    y_t_onehot = torch.from_numpy(y_t).type(torch.LongTensor)
    y_t = to_label(y_t_onehot)
    x_t, y_t = x_t.to(device), y_t.to(device)
    y_t_onehot = y_t_onehot.to(device)

    plt.figure(figsize=(10, 4))
    
    for i in range(n_iterations):
        
        x, y = cifar10['train'].next_batch(batch_size)
        x = torch.from_numpy(x.reshape(-1, input_size))
        y_onehot = torch.from_numpy(y).type(torch.LongTensor)
        y = to_label(y_onehot)
        x, y = x.to(device), y.to(device)
        y_onehot = y_onehot.to(device)
        
        optimizer.zero_grad()
        output = net(x)
        train_loss = criterion(output, y)

        reg_loss = 0
        for param in net.parameters():
            reg_loss += param.norm(2)
            
        loss = train_loss + alpha * reg_loss
        loss.backward()
        optimizer.step()
        
        losses.append(loss.item())
        accuracies.append(accuracy(output.detach().data, y_onehot.detach()))
        
        del x, y
        
        if i % eval_freq == 0:
            acc_t, loss_t = test()
            test_accuracies.append(acc_t)
            test_losses.append(loss_t)
            
            log_string = "[{:5d}/{:5d}] Test Accuracy: {:.4f} | Batch Accuracy: {:.4f} | Batch Loss: {:.6f} | Train/Reg: {:.6f}/{:.6f}\n".format(
                i, n_iterations, test_accuracies[-1], accuracies[-1], loss, train_loss, reg_loss * alpha
            )
            print(log_string)
            
            if SAVE_LOGS:
                with open("./out/log/pytorch_log_" + str(batch_size) + "_" + str(lr_rate) + ".txt", "a") as myfile:
                    myfile.write(log_string)

            if SAVE_PLOTS:
                plot(i)

            net.train()
示例#20
0
def train():
    """
  Performs training and evaluation of MLP model.
    """
    print_flags()
    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    # use GPU if available
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    lr = FLAGS.learning_rate
    max_steps = FLAGS.max_steps
    batch_size = FLAGS.batch_size
    eval_freq = FLAGS.eval_freq
    data_dir = FLAGS.data_dir
    optim_type = FLAGS.optimizer
    #plot_results = FLAGS.plot
    train_treshold = 1e-6  # if train loss below that threshold, training stops

    # evaluation metrics
    acc_train = []
    acc_test = []
    loss_train = []
    loss_test = []

    # load input data
    cifar10 = cifar10_utils.get_cifar10(data_dir, one_hot=True)

    # get test data
    x_test = cifar10["test"].images
    y_test = cifar10["test"].labels
    train_data = cifar10["train"]

    # determine dimension of data
    x_dim = x_test.shape
    n_test_samples = x_dim[0]  # number of test samples
    # images of size 32 x 32 x 3
    n_inputs = x_dim[1] * x_dim[2] * x_dim[3]  # channels * height * width
    # reshape test images to fit MLP input
    x_test = x_test.reshape((n_test_samples, n_inputs))

    n_classes = y_test.shape[1]

    #reshape data to tensor representation
    x_test = x_test.reshape((n_test_samples, n_inputs))
    x_test_torch = torch.tensor(x_test, dtype=torch.float, device=device)
    y_test_torch = torch.tensor(y_test, dtype=torch.float, device=device)

    #initialize MLP model
    mlp_model = MLP(n_inputs=n_inputs,
                    n_hidden=dnn_hidden_units,
                    n_classes=n_classes).to(device)

    if optim_type == 'SGD':
        optimizer = torch.optim.SGD(mlp_model.parameters(), lr=lr)
    elif optim_type == 'Adam':
        optimizer = torch.optim.Adam(mlp_model.parameters(), lr=lr)
    elif optim_type == 'Adadelta':
        optimizer = torch.optim.Adadelta(mlp_model.parameters(), lr=lr)

    optimizer.zero_grad()

    #define loss function
    loss_fn = nn.CrossEntropyLoss()

    # evaluation metrics
    acc_train = []
    acc_test = []
    loss_train = []
    loss_test = []
    best_acc = 0.0
    results = []

    #train the model
    print("Start training")
    for step in range(max_steps):

        #get mini-batch
        x_train, y_train = train_data.next_batch(batch_size)
        x_train = x_train.reshape((batch_size, n_inputs))

        #transform to tensor representation
        x_train_torch = torch.tensor(x_train, dtype=torch.float, device=device)
        y_train_torch = torch.tensor(
            y_train, dtype=torch.float,
            device=device)  #labels for mb training set

        #set gradients to zero
        optimizer.zero_grad()

        #forward pass mb to get predictions as output
        out = mlp_model.forward(x_train_torch)

        #compute loss
        loss_mb = loss_fn.forward(out, y_train_torch.argmax(dim=1))

        #backward pass
        loss_mb.backward()
        optimizer.step()

        #evaluate training and validation set (pretty much the same as with Numpy)
        # perhaps modify learning rate?
        if (step % eval_freq == 0) or (step == max_steps - 1):
            print(f"Step: {step}")
            # compute and store training metrics
            loss_train.append(loss_mb.item())
            acc_train.append(accuracy(out, y_train_torch))
            print("TRAIN acc: {0:.4f}  & loss: {1:.4f}".format(
                acc_train[-1], loss_train[-1]))

            # compute and store test metrics
            # Note that we use the test set as validation set!! Only as an exception :P
            # if test set is too big to fit into memory, use mini-batches as well and average results
            out_test = mlp_model.forward(x_test_torch)
            loss_val = loss_fn.forward(out_test, y_test_torch.argmax(dim=1))
            loss_test.append(loss_val.item())
            acc_test.append(accuracy(out_test, y_test_torch))
            print("TEST acc: {0:.4f}  & loss: {1:.4f}".format(
                acc_test[-1], loss_test[-1]))

            results.append([
                step, acc_train[-1], loss_train[-1], acc_test[-1],
                loss_test[-1]
            ])

            if acc_test[-1] > best_acc:
                best_acc = acc_test[-1]
                print("New BEST acc: {0:.4f}".format(best_acc))

            # Early stop when training loss below threshold?
            if len(loss_train) > 20:
                prev_losses = loss_test[-2]
                cur_losses = loss_test[-1]
                if abs(prev_losses - cur_losses) < train_treshold:
                    print("Training stopped early at step {0}".format(step +
                                                                      1))
                    break
    print("Finished training")
    print("BEST acc: {0:.4f}".format(best_acc))

    res_path = Path.cwd().parent / 'mlp_pytorch_results'

    if not res_path.exists():
        res_path.mkdir(parents=True)

    print("Saving results to {0}".format(res_path))

    #model_path.mkdir(parents=True, exist_ok=True)
    #model_path = model_path / 'mlp_pytorch.csv'
    res_path = res_path / 'mlp_pytorch.csv'

    mode = 'a'
    if not res_path.exists():
        mode = 'w'

    col_names = [
        'step', 'train_acc', 'train_loss', 'test_acc', 'test_loss', 'lr',
        'max_steps', 'batch_size', 'dnn_hidden_units', 'optimizer'
    ]

    with open(res_path, mode) as csv_file:
        if mode == 'w':
            csv_file.write('|'.join(col_names) + '\n')
        for i in range(len(results)):
            csv_file.write(
                f'{results[i][0]};{results[i][1]};{results[i][2]};{results[i][3]};{results[i][4]}'
                f'{lr};{max_steps};{batch_size};{dnn_hidden_units};{optim_type};'
                + '\n')

            #results.append([step, acc_train[-1], loss_train[-1], acc_test[-1], loss_test[-1]])
    return results
示例#21
0
def train():
    """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    #dnn_hidden_units = [200,200]

    #batch_size = 200
    cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)

    x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size)
    #  print(x_train.shape)

    MLP_net = MLP(n_inputs=1 * 3 * 32 * 32,
                  n_hidden=dnn_hidden_units,
                  n_classes=10)

    params = MLP_net.parameters()
    criterion = torch.nn.CrossEntropyLoss()
    #  criterion = torch.nn.L1Loss()
    #  optimizer = torch.optim.SGD(params,lr=FLAGS.learning_rate)#,momentum=0.005)# weight_decay=0.001)
    optimizer = torch.optim.Adam(
        params, lr=FLAGS.learning_rate)  #,weight_decay=0.0001)
    #  optimizer = torch.optim.SGD(params,lr=0.02)
    #  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4000, gamma=0.8)
    print(MLP_net)

    batch_norm = torch.nn.BatchNorm2d(3)  #,affine=False,momentum=0)

    loss_list = []
    for step in range(FLAGS.max_steps):
        # Get batch and reshape input to vector
        x_train, y_train = cifar10['train'].next_batch(FLAGS.batch_size)

        x_train = batch_norm(torch.from_numpy(x_train)).detach().numpy()

        x_train = np.reshape(x_train, (FLAGS.batch_size, -1))

        net_output = MLP_net.forward(torch.from_numpy(x_train))

        batch_accuracy = accuracy(net_output.detach().numpy(), y_train)

        y_train = torch.from_numpy(y_train)
        y_train = y_train.type(torch.LongTensor)
        #      y_train = y_train.type(torch.FloatTensor)

        loss = criterion(net_output, torch.max(y_train, 1)[1])
        loss_list.append(loss)
        #      print("loss : ",loss)

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()

        #      scheduler.step()
        #      print("out and y shapes : "+str(net_output.shape),str(y_train.shape))
        if (step + 1) % FLAGS.eval_freq == 0:
            #          print("in test")
            x_test, y_test = cifar10['test'].images, cifar10['test'].labels
            x_test = batch_norm(torch.from_numpy(x_test)).detach().numpy()
            x_test = np.reshape(x_test, (x_test.shape[0], -1))
            net_test_output = MLP_net.forward(torch.from_numpy(x_test))
            print("test set accuracy for step " + str(step + 1) + " : " +
                  str(accuracy(net_test_output.detach().numpy(), y_test)))
            print("loss : ", sum(loss_list) / len(loss_list))
            loss_list = []
            writer.add_scalar(
                'Test_accuracy',
                accuracy(net_test_output.detach().numpy(), y_test), step)

        writer.add_scalar('Train_accuracy', batch_accuracy, step)
        writer.add_scalar('Train_loss', loss, step)
def train():
    """
  Performs training and evaluation of MLP model.

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)
    torch.manual_seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    # PUT YOUR CODE HERE  #
    #######################

    # will be used to compute accuracy and loss for the train and test sets by batches
    batch_size_acc = 500
    data_accuracy_loss = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir)
    X_train_acc, y_train_acc = data_accuracy_loss[
        'train'].images, data_accuracy_loss['train'].labels
    X_test_acc, y_test_acc = data_accuracy_loss[
        'test'].images, data_accuracy_loss['test'].labels
    X_train_acc = np.reshape(X_train_acc, (X_train_acc.shape[0], -1))
    X_test_acc = np.reshape(X_test_acc, (X_test_acc.shape[0], -1))
    steps_train = int(X_train_acc.shape[0] / batch_size_acc)
    steps_test = int(X_test_acc.shape[0] / batch_size_acc)

    #loading data for training
    data = cifar10_utils.get_cifar10(data_dir=FLAGS.data_dir)
    n_classes = data['train'].labels.shape[1]
    n_inputs = data['train'].images.shape[1] * data['train'].images.shape[
        2] * data['train'].images.shape[3]
    batch_size = FLAGS.batch_size
    m_steps = FLAGS.max_steps
    alpha = FLAGS.learning_rate

    mlp = MLP(n_inputs, dnn_hidden_units, n_classes)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(mlp.parameters(), lr=alpha)

    X_test, y_test = data['test'].images, data['test'].labels
    X_test = np.reshape(X_test, (X_test.shape[0], -1))
    X_test = torch.from_numpy(X_test)
    y_test = torch.LongTensor(y_test)

    x_ax = []
    acc_train = []
    acc_test = []
    loss_train = []
    loss_test = []

    for step in range(m_steps):

        x, y = data['train'].next_batch(batch_size)
        n = x.shape
        x = x.reshape([n[0], n[1] * n[2] * n[3]])
        x = torch.from_numpy(x)

        y_pred = mlp(x)
        labels = torch.LongTensor(y)

        loss = criterion(y_pred, torch.max(labels, 1)[1])
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if step % FLAGS.eval_freq == 0:
            print('Iteration ', step)

            x_ax.append(step)

            acc_ = []
            loss_ = []
            for i in range(steps_train):
                x_acc = X_train_acc[i * batch_size_acc:(i + 1) *
                                    batch_size_acc]
                y_acc = y_train_acc[i * batch_size_acc:(i + 1) *
                                    batch_size_acc]
                x_acc = torch.from_numpy(x_acc)
                y_acc = torch.LongTensor(y_acc)

                y_pred = mlp.forward(x_acc)
                acc_.append(accuracy(y_pred, y_acc))
                loss_.append(float(criterion(y_pred, torch.max(y_acc, 1)[1])))

            acc_train.append(np.mean(acc_))
            loss_train.append(np.mean(loss_))

            predictions = mlp.forward(X_test)
            acc_test.append(accuracy(predictions, y_test))

            loss_te = criterion(predictions, torch.max(y_test, 1)[1])
            loss_test.append(float(loss_te))

            print('Max train accuracy ', max(acc_train))
            print('Max test accuracy ', max(acc_test))
            print('Min train loss ', min(loss_train))
            print('Min test loss ', min(loss_test))

    x_ax = np.array(x_ax)
    acc_test = np.array(acc_test)
    acc_train = np.array(acc_train)
    loss_test = np.array(loss_test)
    loss_train = np.array(loss_train)

    print('Max train accuracy ', max(acc_train))
    print('Max test accuracy ', max(acc_test))
    print('Min train loss ', min(loss_train))
    print('Min test loss ', min(loss_test))

    fig = plt.figure()
    ax = plt.axes()

    plt.title("MLP Pytorch. Accuracy curves")
    ax.plot(x_ax, acc_train, label='train')
    ax.plot(x_ax, acc_test, label='test')
    ax.set_xlabel('Step')
    ax.set_ylabel('Accuracy')
    plt.legend()
    plt.savefig('accuracy_mlp.jpg')

    fig = plt.figure()
    ax = plt.axes()
    plt.title("MLP Pytorch. Loss curves")
    ax.plot(x_ax, loss_train, label='train')
    ax.plot(x_ax, loss_test, label='test')
    ax.set_xlabel('Step')
    ax.set_ylabel('Loss')
    ax.set_ylim(top=10, bottom=1)
    plt.legend()
    plt.savefig('loss_mlp.jpg')
示例#23
0
def train():
  """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)

  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  # Get negative slope parameter for LeakyReLU
  neg_slope = FLAGS.neg_slope
  learning_rate = FLAGS.learning_rate
  batch_size = FLAGS.batch_size
  max_steps = FLAGS.max_steps

  results = open("results.dat","w+")
  results.write("#torch_mlp \n#neg_slope : {}\n#learning_rate : {}\n#batch_size : {}\n#hidden_units : {}\
\n#max_steps : {}\n".format(neg_slope, learning_rate, batch_size, dnn_hidden_units, max_steps))

  device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

  cifar10 = cifar10_utils.get_cifar10(FLAGS.data_dir)
  
  x_test, t_test = cifar10["test"].images, cifar10["test"].labels
  x_test = torch.tensor(x_test.reshape(np.size(x_test[:,0,0,0]), N_INPUTS)).to(device)
  t_test_indx = torch.tensor(np.where(t_test==1)[1]).to(device)
  
  mlp = MLP(N_INPUTS, dnn_hidden_units, N_CLASSES, neg_slope).to(device)
  
  crossEntropy = nn.CrossEntropyLoss()
    
  optimizer = optim.SGD(mlp.parameters(), lr=learning_rate)
  results.write("#GPUs : {}\n".format(torch.cuda.device_count())) #show no of available gpus
#   print("GPUs : ", torch.cuda.device_count())
  if torch.cuda.device_count() > 1:
    nn.DataParallel(mlp)
    
  results.write("#epoch batch max_steps loss train_acc test_acc test_loss\n")
  
  for batch in range(1,max_steps+1):
    
    optimizer.zero_grad()
    
    x, t = cifar10["train"].next_batch(batch_size)
    x = torch.tensor(x.reshape(batch_size, N_INPUTS), requires_grad=True).to(device)
    t_indx = torch.tensor(np.where(t==1)[1]).to(device) #shape: (batch_size,)
        
    y = mlp(x).to(device) #y predictions, t targets
    loss = crossEntropy(y, t_indx) #includes softmax
    
    #accuracy before updating
    if batch == 1:
      train_acc = accuracy(y, t) 
      y_test = mlp(x_test).to(device)
      test_loss = crossEntropy(y_test, t_test_indx)
      test_acc = accuracy(y_test, t_test)
      results.write("%d %d %d %.3f %.3f %.3f %.3f\n" % 
          (cifar10["train"]._epochs_completed, 0, max_steps, loss, train_acc, test_acc, test_loss))
#       print("Epoch: %d. Batch: %d/%d. Loss: %.3f. Train_acc: %.3f. Test_acc: %.3f" % 
#           (cifar10["train"]._epochs_completed, 0, max_steps, loss, train_acc, test_acc))
      
    
    #update weights
    loss.backward()
    optimizer.step()
        
    if batch % FLAGS.eval_freq == 0:
      train_acc = accuracy(y, t) 
      y_test = mlp(x_test).to(device)
      test_loss = crossEntropy(y_test, t_test_indx)
      test_acc = accuracy(y_test, t_test)
      results.write("%d %d %d %.3f %.3f %.3f %.3f\n" % 
          (cifar10["train"]._epochs_completed, batch, max_steps, loss, train_acc, test_acc, test_loss))
#       print("Epoch: %d. Batch: %d/%d. Loss: %.3f. Train_acc: %.3f. Test_acc: %.3f" % 
#           (cifar10["train"]._epochs_completed, batch, max_steps, loss, train_acc, test_acc))
  results.close()
示例#24
0
def train():
    """
    Performs training and evaluation of MLP model.
    """

    # DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    # Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    # Preparation for training
    print('- Init parameters')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    data = cifar10_utils.get_cifar10(FLAGS.data_dir)
    train_data = data['train']
    test_data = data['test']
    w, h, d = train_data.images[0].shape
    n_classes = train_data.labels[0].shape[0]

    criterion = nn.CrossEntropyLoss()
    model = MLP(w * h * d, dnn_hidden_units, n_classes).to(device)
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=FLAGS.learning_rate,
                                weight_decay=FLAGS.weight_decay,
                                momentum=FLAGS.momentum)

    train_losses = []
    test_losses = []
    test_accuracies = []

    # Train
    print('- Start Training')
    for step in range(FLAGS.max_steps):
        x_batch, x_labels = next_batch_in_tensors(train_data, FLAGS.batch_size,
                                                  device)

        optimizer.zero_grad()
        out = model(x_batch)
        loss = criterion(out, x_labels.argmax(dim=1))
        loss.backward()
        optimizer.step()

        train_losses.append(loss.data[0].item())  #.cpu().numpy())

        if (step % FLAGS.eval_freq == 0) or (step == FLAGS.max_steps - 1):
            # Test current
            test_x, test_labels = next_batch_in_tensors(
                test_data, test_data.num_examples, device)

            out_test = model(test_x)
            loss_test = criterion(out_test, test_labels.argmax(dim=1))
            acc = accuracy(out_test, test_labels)

            test_losses.append(loss_test.data[0].item())  #.cpu().numpy())
            test_accuracies.append(acc.item())  #.numpy())

        # if step % 10 == 0:
        #     print('   Step: {}, Train Loss: {}'.format(str(step), str(loss.data[0])))
        #     print('             Test Loss:  {}'.format(str(loss_test.data[0])))

    # Save stuff
    filename = 'steps-{}_layers-{}_lr-{}_bs-{}'.format(FLAGS.max_steps,
                                                       FLAGS.dnn_hidden_units,
                                                       FLAGS.learning_rate,
                                                       FLAGS.batch_size)

    if FLAGS.momentum != SGD_MOMENTUM_DEFAULT:
        filename += '_SGDmomentum-{}'.format(FLAGS.momentum)

    if FLAGS.weight_decay != SGD_WEIGHT_DECAY_DEFAULT:
        filename += '_SGDweightDecay-{}'.format(FLAGS.weight_decay)

    filepath = '../models/{}'.format(filename)
    if not os.path.exists(filepath):
        os.makedirs(filepath)

    torch.save(model, '{}/model.pt'.format(filepath))

    with open('{}/train_loss'.format(filepath), 'wb+') as f:
        pickle.dump(train_losses, f)

    with open('{}/test_loss'.format(filepath), 'wb+') as f:
        pickle.dump(test_losses, f)

    with open('{}/accuracies'.format(filepath), 'wb+') as f:
        pickle.dump(test_accuracies, f)

    print(test_accuracies[-1])
示例#25
0
def train():
    """
    Performs training and evaluation of MLP model.

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    if FLAGS.data_dir:
        DATA_DIR_DEFAULT = FLAGS.data_dir

    # Get negative slope parameter for LeakyReLU
    neg_slope = FLAGS.neg_slope

    ########################
    # PUT YOUR CODE HERE  #
    #######################

    batch_size = FLAGS.batch_size
    learning_rate = FLAGS.learning_rate

    cifar_data = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT)

    train_data = cifar_data['train']
    test_data = cifar_data['test']

    n_classes = train_data.labels.shape[1]
    n_inputs = np.prod(train_data.images.shape[1:])

    x_test, y_test = test_data.images, test_data.labels
    x_test = torch.from_numpy(np.reshape(x_test, (x_test.shape[0], n_inputs)))
    y_test = torch.from_numpy(np.argmax(y_test, axis=1)).type(torch.LongTensor)

    criterion = nn.CrossEntropyLoss()
    model = MLP(n_inputs, dnn_hidden_units, n_classes, neg_slope)

    if FLAGS.optimizer == 'ADAM':
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    elif FLAGS.optimizer == 'ADAMwd':
        optimizer = optim.Adam(model.parameters(),
                               lr=learning_rate,
                               weight_decay=0.02)
    elif FLAGS.optimizer == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    elif FLAGS.optimizer == 'RMS':
        optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
    else:
        print("Optimizer: Used default option, SGD")
        optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    # Train and Test losses
    losses = [[], []]
    # Train and Test accuracies
    accuracies = [[], []]

    # True iteration for plotting
    iterations = []

    for iteration in np.arange(FLAGS.max_steps):
        x, y = train_data.next_batch(batch_size)
        x = torch.from_numpy(np.reshape(x, (batch_size, n_inputs)))
        # argmax in order to align labels with the Cross entropy loss function
        y = torch.from_numpy(np.argmax(y, axis=1)).type(torch.LongTensor)

        train_output = model.forward(x)
        loss = criterion(train_output, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if iteration % FLAGS.eval_freq == 0 or iteration == FLAGS.max_steps - 1:
            iterations.append(iteration)

            # Second forward pass for test set
            with torch.no_grad():
                test_output = model.forward(x_test)

            # Calculate losses
            train_loss = criterion.forward(train_output, y)
            losses[0].append(train_loss)
            test_loss = criterion.forward(test_output, y_test)
            losses[1].append(test_loss)

            # Calculate accuracies
            train_acc = accuracy(train_output, y)
            test_acc = accuracy(test_output, y_test)
            accuracies[0].append(train_acc)
            accuracies[1].append(test_acc)

            print(
                "Iteration {}, Train loss: {}, Train accuracy: {}, Test accuracy: {}"
                .format(iteration, train_loss, train_acc, test_acc))

    fig = plt.figure(figsize=(25, 10), dpi=200)
    fig.suptitle('PyTorch MLP: Losses and Accuracies', fontsize=40)
    ax1 = fig.add_subplot(1, 2, 1)
    ax2 = fig.add_subplot(1, 2, 2)

    ax1.plot(iterations, losses[0], linewidth=4, color="g", label="Train loss")
    ax1.plot(iterations, losses[1], linewidth=4, color="c", label="Test loss")
    ax2.plot(iterations,
             accuracies[0],
             linewidth=4,
             color="g",
             label="Train accuracy")
    ax2.plot(iterations,
             accuracies[1],
             linewidth=4,
             color="c",
             label="Test accuracy")

    ax1.set_xlabel('$Iteration$', fontsize=28)
    ax1.set_ylabel('$Loss$', fontsize=28)
    ax2.set_xlabel('$Iteration$', fontsize=28)
    ax2.set_ylabel('$Accuracy$', fontsize=28)

    ax1.legend(fontsize=22)
    ax2.legend(fontsize=22)

    plt.savefig("../figures/pytorch_mlp.png")
    plt.show()
def train():
    """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    # Get negative slope parameter for LeakyReLU
    neg_slope = FLAGS.neg_slope

    ########################
    # PUT YOUR CODE HERE  #
    #######################
    import matplotlib.pyplot as plt

    data = cifar10_utils.get_cifar10(FLAGS.data_dir)

    train = data['train']
    test = data['test']

    dim_x = train.images.shape[1] * train.images.shape[2] * train.images.shape[
        3]

    mlp = MLP(dim_x, dnn_hidden_units, train.labels.shape[1], neg_slope)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(mlp.parameters(), FLAGS.learning_rate)

    loss_train = np.zeros((int(np.floor(FLAGS.max_steps / FLAGS.eval_freq), )))
    loss_test = np.zeros((int(np.floor(FLAGS.max_steps / FLAGS.eval_freq), )))
    accuracy_test = np.zeros(
        (int(np.floor(FLAGS.max_steps / FLAGS.eval_freq), )))

    images_test_np = test.images
    labels_test_np = test.labels
    images_test_np = np.reshape(images_test_np,
                                (images_test_np.shape[0], dim_x))

    images_test = torch.from_numpy(images_test_np)
    labels_test = torch.from_numpy(np.argmax(labels_test_np, axis=1))

    for i in range(0, FLAGS.max_steps):
        if PRINTS:
            print('iter', i + 1, end='\r')

        images_np, labels_np = train.next_batch(FLAGS.batch_size)
        images_np = np.reshape(images_np, (images_np.shape[0], dim_x))

        images = torch.from_numpy(images_np)
        labels = torch.from_numpy(np.argmax(labels_np, axis=1))

        optimizer.zero_grad()

        pred = mlp(images)
        loss = criterion(pred, labels.long())
        loss.backward()
        optimizer.step()

        if (i + 1) % FLAGS.eval_freq == 0:
            loss_train[i // FLAGS.eval_freq] = loss.item()
            pred_test = mlp(images_test)
            accuracy_test[i // FLAGS.eval_freq] = accuracy(
                pred_test, F.one_hot(labels_test))
            loss_test[i // FLAGS.eval_freq] = criterion(
                pred_test, labels_test.long()).item()
            if PRINTS:
                print()
                print('test_loss:', loss_test[i // FLAGS.eval_freq])
                print('test_accuracy:', accuracy_test[i // FLAGS.eval_freq])
                print('train_loss:', loss_train[i // FLAGS.eval_freq])
    if PLOTS:
        fig, ax = plt.subplots(1, 2, figsize=(10, 5))
        fig.suptitle('Training curves for Pytorch MLP')

        ax[0].set_title('Loss')
        ax[0].set_ylabel('Loss value')
        ax[0].set_xlabel('No of batches seen x{}'.format(FLAGS.eval_freq))
        ax[0].plot(loss_train, label='Train')
        ax[0].plot(loss_test, label='Test')
        ax[0].legend()

        ax[1].set_title('Accuracy')
        ax[1].set_ylabel('Accuracy value')
        ax[1].set_xlabel('No of batches seen x{}'.format(FLAGS.eval_freq))
        ax[1].plot(accuracy_test, label='Test')
        ax[1].legend()
        plt.show()
示例#27
0
def train():
    """
  Performs training and evaluation of MLP model.

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    ########################
    #######################
    # PUT YOUR CODE HERE  #
    learning_rate = FLAGS.learning_rate
    max_steps = FLAGS.max_steps
    batch_size = FLAGS.batch_size
    eval_freq = FLAGS.eval_freq

    cifar10 = cifar10_utils.get_cifar10('cifar10/cifar-10-batches-py')

    mlp = MLP(32 * 32 * 3, dnn_hidden_units, 10).cuda()

    # opt = optim.SGD(mlp.parameters(), lr = learning_rate)
    opt = optim.Adam(mlp.parameters(), lr=learning_rate)
    loss_function = nn.CrossEntropyLoss()

    train_losses = []
    accuracies = []
    steps = []

    for step in range(max_steps):
        total_loss = 0

        x, y = cifar10['train'].next_batch(batch_size)
        x_tensor = torch.from_numpy(np.reshape(
            x, [batch_size, 32 * 32 * 3])).cuda()
        y_tensor = torch.from_numpy(y).cuda()

        out = mlp(x_tensor)
        loss = loss_function(out, torch.max(y_tensor, 1)[1])
        total_loss += loss

        opt.zero_grad()

        loss.backward()
        opt.step()

        train_losses.append(total_loss)
        print('Step: {} Loss: {:.4f}'.format(step + 1, total_loss))
        if (step + 1) % eval_freq == 0:
            test_x = cifar10['test'].images
            test_y = cifar10['test'].labels
            test_x_tensor = torch.from_numpy(
                np.reshape(test_x, [test_x.shape[0], 32 * 32 * 3])).cuda()
            test_y_tensor = torch.from_numpy(test_y).cuda()

            test_out = mlp(test_x_tensor)
            test_accuracy = accuracy(test_out, test_y_tensor)
            accuracies.append(test_accuracy)
            steps.append(step + 1)

            print('Step: {} Accuracy {:.2f}'.format(step + 1, test_accuracy))

    plt.plot(range(max_steps), train_losses)
    plt.xlabel("Step")
    plt.ylabel("Training loss")
    plt.show()

    plt.plot(steps, accuracies)
    plt.show()
def train():
    """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [
            int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units
        ]
    else:
        dnn_hidden_units = []

    # Get negative slope parameter for LeakyReLU
    neg_slope = FLAGS.neg_slope

    ########################
    # PUT YOUR CODE HERE  #
    #######################

    def init_weights(m):
        print(m)
        if type(m) == nn.Linear:
            m.weight.data.uniform_(0.0, 1.0)
            print(m.weight)
            m.bias.data.fill_(0.0)
            print(m.bias)

    lr = FLAGS.learning_rate
    eval_freq = FLAGS.eval_freq
    max_steps = FLAGS.max_steps
    batch_size = FLAGS.batch_size
    input_size = 32 * 32 * 3
    output_size = 10
    # load dataset
    raw_data = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT)
    train_data = raw_data['train']
    validation_data = raw_data["validation"]
    test_data = raw_data['test']

    model = MLP(n_inputs=input_size,
                n_hidden=dnn_hidden_units,
                n_classes=output_size,
                neg_slope=neg_slope)
    print(model.layers)

    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_target = nn.CrossEntropyLoss()
    csv_data = [[
        'step', 'train_loss', 'test_loss', 'train_accuracy', 'test_accuracy'
    ]]
    print("initial weights as normal distribution and bias as zeros")
    # model.layers.apply(init_weights)

    for step in range(max_steps):
        x, y = train_data.next_batch(batch_size)
        x = x.reshape(batch_size, input_size)
        x = torch.tensor(x, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.long)
        # train
        # x = Variable(torch.from_numpy(x))
        output = model.forward(x)
        loss = loss_target.forward(output, y.argmax(dim=1))
        # somehow we need to divide the loss by the output size to get the same loss
        loss_avg = loss.item()
        # model.zero_grad()
        optimizer.zero_grad()
        loss.backward()

        # only need to update weights for linear module for each step
        optimizer.step()

        # with torch.no_grad():
        #   for param in model.parameters():
        #     param.data -= lr * param.grad

        train_acc = accuracy(output, y)
        # with the \r and end = '' trick, we can print on the same line
        print('\r[{}/{}] train_loss: {}  train_accuracy: {}'.format(
            step + 1, max_steps, round(loss_avg, 3), round(train_acc, 3)),
              end='')
        # evaluate
        if step % eval_freq == 0 or step >= (max_steps - 1):
            x, y = test_data.next_batch(test_data.num_examples)
            x = x.reshape(test_data.num_examples, input_size)
            x = torch.tensor(x, dtype=torch.float32)
            y = torch.tensor(y, dtype=torch.long)
            output = model.forward(x)
            test_loss = loss_target.forward(output, y.argmax(dim=1)).item()
            test_acc = accuracy(output, y)
            csv_data.append([step, loss_avg, test_loss, train_acc, test_acc])
            print(' test_loss: {}, test_accuracy: {}'.format(
                round(test_loss, 3), round(test_acc, 3)))
    with open('results/train_summary_torch_{}.csv'.format(int(time.time())),
              'w') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerows(csv_data)
示例#29
0
def train():
  """
  Performs training and evaluation of MLP model. 

  TODO:
  Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
  """

  ### DO NOT CHANGE SEEDS!
  # Set the random seeds for reproducibility
  np.random.seed(42)

  ## Prepare all functions
  # Get number of units in each hidden layer specified in the string such as 100,100
  if FLAGS.dnn_hidden_units:
    dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
    dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
  else:
    dnn_hidden_units = []

  # Get negative slope parameter for LeakyReLU
  neg_slope = FLAGS.neg_slope
  
  ########################
  # PUT YOUR CODE HERE  #
  #######################
  cifar10 = cifar10_utils.get_cifar10(DATA_DIR_DEFAULT)
  x, y = cifar10['train'].next_batch(1)
  x_test, y_test = cifar10['test'].next_batch(10000)
  x = x.reshape(x.shape[0], -1)
  x_test = x_test.reshape(x_test.shape[0], -1)

  x_test = torch.tensor(x_test)
  y_test = torch.tensor(y_test)

  model = MLP(x.shape[1], dnn_hidden_units, y.shape[1], neg_slope)

  prediction = model.forward(torch.tensor(x[0]))
  crossEntropy = nn.CrossEntropyLoss()
  target = torch.tensor(y[0])

  optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.learning_rate, amsgrad=True)

  """
  batch gradient descent
  """
  for i in range(FLAGS.max_steps):
    x, y = cifar10['train'].next_batch(FLAGS.batch_size)
    x = x.reshape(x.shape[0], -1)
    x = torch.tensor(x)
    y = torch.LongTensor(y)

    prediction = model.forward(x)

    loss = crossEntropy.forward(prediction, torch.max(y, 1)[1])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if i%FLAGS.eval_freq == 0:
      prediction = model.forward(x_test)
      prediction = nn.functional.softmax(prediction)
      print('Accuracy after '+ str(i) +' steps ' + str(accuracy(prediction, y_test)))

  prediction = model.forward(x_test)
  print('Final accuracy')
  print(accuracy(prediction, y_test))
示例#30
0
def train(running_loss=0.0):
    """
    Performs training and evaluation of MLP model.

    TODO:
    Implement training and evaluation of MLP model. Evaluate your model on the whole test set each eval_freq iterations.
    """

    ### DO NOT CHANGE SEEDS!
    # Set the random seeds for reproducibility
    np.random.seed(42)

    ## Prepare all functions
    # Get number of units in each hidden layer specified in the string such as 100,100
    if FLAGS.dnn_hidden_units:
        dnn_hidden_units = FLAGS.dnn_hidden_units.split(",")
        dnn_hidden_units = [int(dnn_hidden_unit_) for dnn_hidden_unit_ in dnn_hidden_units]
    else:
        dnn_hidden_units = []

    # Set path to data
    data_dir = FLAGS.data_dir

    data = cifar10_utils.get_cifar10(data_dir)

    # # ===============================  Approach 1  ===========================================

    # # Prepare the test set
    # input_dims_test = data['test'].images.shape
    # height = input_dims_test[1]
    # width = input_dims_test[2]
    # channels = input_dims_test[3]
    # num_images_test = input_dims_test[0]
    # image_dims_ravel = height * width * channels
    #
    # X_test = data["test"].images
    # Y_test = data["test"].labels
    # # Make acceptable input for test
    # X_test = X_test.reshape((num_images_test, image_dims_ravel))
    #
    # # make usable by pytorch
    # X_test = torch.tensor(X_test, requires_grad=False).type(dtype).to(device)
    # Y_test = torch.tensor(Y_test, requires_grad=False).type(dtype).to(device)

    #
    # # Create model (i.e. Net)
    # model = MLP(n_inputs=image_dims_ravel, n_hidden=dnn_hidden_units, n_classes=10)
    #
    # accuracy_train_log = list()
    # accuracy_test_log = list()
    # loss_train_log = list()
    # loss_test_log = list()
    #
    # # FLAGS hold command line arguments
    # batch_size = FLAGS.batch_size
    # numb_iterations = FLAGS.max_steps
    # learning_rate = FLAGS.learning_rate
    # evaluation_freq = FLAGS.eval_freq
    # logging.info(f"learning rate: %2d " % learning_rate)
    #
    # # Before backprop calc loss and its derivative
    # criterion = nn.CrossEntropyLoss()
    # new = model.model_params_tensors[0] + model.model_params_tensors[1]
    # # optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9)
    # optimizer = optim.SGD(model.param_list, lr=learning_rate, momentum=0.9)
    #
    # for step in range(numb_iterations):
    #
    #     X_batch, Y_batch = data['train'].next_batch(batch_size)
    #
    #     X_batch = X_batch.reshape((batch_size, image_dims_ravel))
    #
    #     # Convert to tensors which are handled by the device
    #     X_batch = torch.from_numpy(X_batch).type(dtype).to(device)
    #     Y_batch = torch.from_numpy(Y_batch).type(dtype).to(device)
    #
    #     # why do we need this again?
    #     optimizer.zero_grad()
    #
    #     targs = Y_batch.argmax(dim=1)
    #     # forward + backward + optimize
    #     outputs = model(X_batch)
    #     loss_current = criterion(outputs, targs)
    #     loss_current.backward()
    #     optimizer.step()
    #
    #     running_loss = loss_current.item()
    #
    #     if step % evaluation_freq == 0:
    #         loss_train_log.append(running_loss)
    #         accuracy_train_log.append(accuracy(outputs, Y_batch))
    #         logging.info(f"train performance: loss = %4f, accuracy = %4f ", loss_train_log[-1], accuracy_train_log[-1])
    #
    #         # Get performance on the test set
    #         # targs_test = Y_test.argmax(dim=1)
    #         # outputs = model(X_test)
    #         # loss_test_log.append(criterion(outputs, targs_test))
    #         # accuracy_test_log.append(accuracy(outputs, Y_test))
    #         # logging.info(f"test performance: loss = %4f , accuracy = %4f", loss_test_log[-1], accuracy_test_log[-1])
    #
    #         # TODO: implement early stopping ?
    #
    # path = "./mlp_results_pytorch/"
    # date_time = datetime.now().replace(second=0, microsecond=0).strftime(format="%Y-%m-%d-%H-%M")
    # np.save(os.path.join(path, date_time + "accuracy_test"), accuracy_test_log)
    # np.save(os.path.join(path, date_time + "loss_test"), loss_test_log)
    # np.save(os.path.join(path, date_time + "loss_train"), loss_train_log)
    # np.save(os.path.join(path, date_time + "accuracy_train"), accuracy_train_log)

    # ===============================  Approach 1.2, sequantial  ===========================================

    input_dims_test = data['test'].images.shape
    height = input_dims_test[1]
    width = input_dims_test[2]
    channels = input_dims_test[3]
    num_images_test = input_dims_test[0]
    image_dims_ravel = height * width * channels

    X_test = data["test"].images
    Y_test = data["test"].labels
    # Make acceptable input for test
    X_test = X_test.reshape((num_images_test, image_dims_ravel))

    X_test = torch.tensor(X_test, requires_grad=False).type(dtype).to(device)
    Y_test = torch.tensor(Y_test, requires_grad=False).type(dtype).to(device)

    model = MLP(n_inputs=image_dims_ravel, n_hidden=dnn_hidden_units, n_classes=10)
    # if cuda_flag:
    model.cuda()

    accuracy_train_log = list()
    accuracy_test_log = list()
    loss_train_log = list()
    loss_test_log = list()

    batch_size = FLAGS.batch_size
    numb_iterations = FLAGS.max_steps
    learning_rate = FLAGS.learning_rate
    evaluation_freq = FLAGS.eval_freq
    logging.info(f"learning rate: %2d " % learning_rate)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for step in range(numb_iterations):

        X_batch, Y_batch = data['train'].next_batch(batch_size)

        X_batch = X_batch.reshape((batch_size, image_dims_ravel))

        # Convert to tensors which are handled by the device
        X_batch = torch.from_numpy(X_batch).type(dtype).to(device)
        Y_batch = torch.from_numpy(Y_batch).type(dtype).to(device)

        # why do we need this again?
        optimizer.zero_grad()

        targs = Y_batch.argmax(dim=1)
        outputs = model.forward(X_batch)
        loss_current = criterion(outputs, targs)
        loss_current.backward()
        optimizer.step()

        X_train = data['train'].images.reshape((data['train'].images.shape[0],
                                                image_dims_ravel))
        Y_train = data['train'].labels

        X_train = torch.tensor(X_train, requires_grad=False).type(dtype).to(device)
        Y_train = torch.tensor(Y_train, requires_grad=False).type(dtype).to(device)

        targs_train = Y_train.argmax(dim=1)

        running_loss = loss_current.detach().item()

        if step % evaluation_freq == 0:
            list_acc = list()
            list_loss = list()
            for i in range(0, 70):
                selection = random.sample(range(1, 5000), 64)
                targs_train = Y_train[selection].argmax(dim=1)
                outputs_train = model(X_train[selection])
                loss_current_train = criterion(outputs_train, targs_train).detach().item()
                acc_current_train = accuracy(outputs_train, Y_train[selection])
                list_loss.append(loss_current_train)
                list_acc.append(acc_current_train)
            loss_train_log.append(np.mean(list_loss))
            accuracy_train_log.append(np.mean(list_acc))
            logging.info(f"train performance: loss = %4f, accuracy = %4f ", loss_train_log[-1], accuracy_train_log[-1])

            list_acc = list()
            list_loss = list()
            for i in range(0, 15):
                selection = random.sample(range(1, 1000), 64)
                targs_test = Y_test[selection].argmax(dim=1)
                outputs_test = model(X_test[selection])
                loss_current_test = criterion(outputs_test, targs_test).detach().item()
                acc_current_test = accuracy(outputs_test, Y_test[selection])
                list_loss.append(loss_current_test)
                list_acc.append(acc_current_test)
            loss_test_log.append(np.mean(list_loss))
            accuracy_test_log.append(np.mean(list_acc))
            logging.info(f"test performance: loss = %4f , accuracy = %4f\n", loss_test_log[-1], accuracy_test_log[-1])

            ## NO BATCHES
            # # evaluate on the whole train set, not only on the bathes
            # output = model.forward(X_train)
            # # targs = data['train'].labels.argmax(dim=1)
            # loss_model_current = criterion(output, targs_train)
            # loss_train_log.append(loss_model_current.detach().item())
            # accuracy_train_log.append(accuracy(output, Y_train))
            # logging.info(f"train performance: loss = %4f, accuracy = %4f ", loss_train_log[-1], accuracy_train_log[-1])
            #
            # # Get performance on the test set
            # targs_test = Y_test.argmax(dim=1)
            # outputs = model(X_test)
            # loss_test_log.append(criterion(outputs, targs_test))
            # accuracy_test_log.append(accuracy(outputs, Y_test))
            # logging.info(f"test performance: loss = %4f , accuracy = %4f\n", loss_test_log[-1], accuracy_test_log[-1])

    path = "./mlp_results_pytorch/"
    date_time = datetime.now().replace(second=0, microsecond=0).strftime(format="%Y-%m-%d-%H-%M")
    np.save(os.path.join(path, date_time + "accuracy_test"), accuracy_test_log)
    np.save(os.path.join(path, date_time + "loss_test"), loss_test_log)
    np.save(os.path.join(path, date_time + "loss_train"), loss_train_log)
    np.save(os.path.join(path, date_time + "accuracy_train"), accuracy_train_log)