示例#1
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    
    if use_momentum:
        learning_rate = 0.02

    global_step = 0
    for epoch in range(num_epochs):
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            prev_grads = model.grads

            outputs = model.forward(X_batch)
            model.backward(X_batch, outputs, Y_batch)
            for i in range(len(model.ws)):
                if use_momentum:
                    model.ws[i] = model.ws[i] - learning_rate  * (model.grads[i] + momentum_gamma * prev_grads[i])
                else:
                    model.ws[i] = model.ws[i] - learning_rate * model.grads[i]

            if (global_step % num_steps_per_val) == 0:
                _val_loss = cross_entropy_loss(Y_val, model.forward(X_val))
                val_loss[global_step] = _val_loss

                _train_loss = cross_entropy_loss(Y_train, model.forward(X_train))
                train_loss[global_step] = _train_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1
        # shuffle training examples after each epoch
        if use_shuffle:
            X_train, Y_train = unison_shuffled_copies(X_train, Y_train)
    return model, train_loss, val_loss, train_accuracy, val_accuracy
示例#2
0
def train(
    num_epochs: int,
    learning_rate: float,
    batch_size: int,
    l2_reg_lambda: float  # Task 3 hyperparameter. Can be ignored before this.
):
    """
        Function that implements logistic regression through mini-batch
        gradient descent for the given hyperparameters
    """
    global X_train, X_val, X_test
    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    model = BinaryModel(l2_reg_lambda)
    if X_train.shape[1] == 784:
        X_train = pre_process_images(X_train)
    if X_test.shape[1] == 784:
        X_test = pre_process_images(X_test)
    if X_val.shape[1] == 784:
        X_val = pre_process_images(X_val)

    global_step = 0
    for epoch in range(num_epochs):
        for step in range(num_batches_per_epoch):
            # Select our mini-batch of images / labels
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            y_hat = model.forward(X_batch)

            model.backward(X_batch, y_hat, Y_batch)
            model.w += -1 * learning_rate * model.grad

            # Track training loss continuously
            _train_loss = cross_entropy_loss(Y_batch, y_hat)
            train_loss[global_step] = _train_loss
            # Track validation loss / accuracy every time we progress 20% through the dataset
            if global_step % num_steps_per_val == 0:
                _val_loss = cross_entropy_loss(Y_val, model.forward(X_val))
                val_loss[global_step] = _val_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1
    return model, train_loss, val_loss, train_accuracy, val_accuracy
示例#3
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}

    global_step = 0
    for epoch in range(num_epochs):
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            outputs = model.forward(X_batch)
            model.backward(X_batch, outputs, Y_batch)
            # update weigths
            model.ws[-1] = model.ws[-1] - learning_rate * model.grads[-1]
            model.ws[-2] = model.ws[-2] - learning_rate * model.grads[-2]

            if (global_step % num_steps_per_val) == 0:
                _outputs_train = model.forward(X_train)
                _train_loss = cross_entropy_loss(Y_train, _outputs_train)
                train_loss[global_step] = _train_loss

                _outputs_val = model.forward(X_val)
                _val_loss = cross_entropy_loss(Y_val, _outputs_val)
                val_loss[global_step] = _val_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1
    return model, train_loss, val_loss, train_accuracy, val_accuracy
示例#4
0
    def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray):
        """
        Perform forward, backward and gradient descent step here.
        The function is called once for every batch (see trainer.py) to perform the train step.
        The function returns the mean loss value which is then automatically logged in our variable self.train_history.

        Args:
            X: one batch of images
            Y: one batch of labels
        Returns:
            loss value (float) on batch
        """

        logits = self.model.forward(X_batch)
        self.model.backward(X_batch, logits, Y_batch)
        loss = cross_entropy_loss(Y_batch, logits)

        # updating weights
        if self.use_momentum:
            self.model.momentum_update_weights(self.learning_rate,
                                               self.momentum_grads)
            self.momentum_grads = self.momentum_gamma * self.model.grads.copy()
        else:
            self.model.update_weights(self.learning_rate)

        return loss
示例#5
0
    def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray):
        """
        Perform forward, backward and gradient descent step here.
        The function is called once for every batch (see trainer.py) to perform the train step.
        The function returns the mean loss value which is then automatically logged in our variable self.train_history.

        Args:
            X: one batch of images
            Y: one batch of labels
        Returns:
            loss value (float) on batch
        """
        # TODO: Implement this function (task 2c)

        model = self.model

        logits = model.forward(X_batch)

        model.backward(X_batch, logits, Y_batch)

        if (self.use_momentum):
            for i, grad in enumerate(model.grads):

                model.ws[i] -= self.learning_rate * self.previous_grads[i]

                self.previous_grads[
                    i] = grad + self.momentum_gamma * self.previous_grads[i]
        else:

            for i, grad in enumerate(model.grads):
                model.ws[i] -= self.learning_rate * grad

        loss = cross_entropy_loss(Y_batch, logits)  # sol

        return loss
示例#6
0
    def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray):
        """
        Perform forward, backward and gradient descent step here.
        The function is called once for every batch (see trainer.py) to perform the train step.
        The function returns the mean loss value which is then automatically logged in our variable self.train_history.

        Args:
            X: one batch of images
            Y: one batch of labels
        Returns:
            loss value (float) on batch
        """
        # TODO: Implement this function (task 2c)
        
        dw_1 = 0
        dw_2 = 0
        
        logits = self.model.forward(X_batch)
        self.model.backward(X_batch, logits, Y_batch)
        
        if(self.use_momentum):
            dw_1 = np.add(self.model.grads[0], self.momentum_gamma * dw_1)
            dw_2 = np.add(self.model.grads[1], self.momentum_gamma * dw_2)
            
            self.model.ws[0] = np.add(self.model.ws[0], -self.learning_rate * dw_1)
            self.model.ws[1] = np.add(self.model.ws[1], -self.learning_rate * dw_2)
        else:
            self.model.ws[0] = np.add(self.model.ws[0], -self.learning_rate * self.model.grads[0])
            self.model.ws[1] = np.add(self.model.ws[1], -self.learning_rate * self.model.grads[1])
        
        loss = cross_entropy_loss(Y_batch, logits)  # sol

        return loss
示例#7
0
    def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray):
        """
        Perform forward, backward and gradient descent step here.
        The function is called once for every batch (see trainer.py) to perform the train step.
        The function returns the mean loss value which is then automatically logged in our variable self.train_history.

        Args:
            X: one batch of images
            Y: one batch of labels
        Returns:
            loss value (float) on batch
        """
        # TODO: Implement this function (task 2b)
        # Forward step (retrieving the predictions)
        outputs = self.model.forward(X_batch)

        # Backward step
        self.model.backward(X_batch, outputs, Y_batch)

        # Updating the weights
        self.model.w -= self.model.grad * self.learning_rate

        # Computing the loss
        loss = cross_entropy_loss(Y_batch, outputs)

        return loss
示例#8
0
    def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray):
        """
        Perform forward, backward and gradient descent step here.
        The function is called once for every batch (see trainer.py) to perform the train step.
        The function returns the mean loss value which is then automatically logged in our variable self.train_history.

        Args:
            X: one batch of images
            Y: one batch of labels
        Returns:
            loss value (float) on batch
        """
        # TODO: Implement this function (task 2c)
        logits = self.model.forward(X_batch)
        self.model.backward(X_batch, logits, Y_batch)

        # update weights
        #self.model.ws = self.model.w - self.learning_rate*self.model.grad
        for i in range(len(self.model.ws)):
            if (self.use_momentum):
                self.model.ws[i] = self.model.ws[i] - (
                    self.learning_rate * self.model.grads[i] +
                    self.momentum_gamma * self.previous_grads[i] *
                    self.learning_rate)
                self.previous_grads[i] = self.model.grads[
                    i] + self.previous_grads[i] * self.momentum_gamma
            else:
                self.model.ws[i] = self.model.ws[
                    i] - self.learning_rate * self.model.grads[i]
        loss = cross_entropy_loss(Y_batch, logits)
        return loss
示例#9
0
    def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray):
        """
        Perform forward, backward and gradient descent step here.
        The function is called once for every batch (see trainer.py) to perform the train step.
        The function returns the mean loss value which is then automatically logged in our variable self.train_history.

        Args:
            X: one batch of images
            Y: one batch of labels
        Returns:
            loss value (float) on batch
        """
        # Perform forward pass to get outputs (predictions)
        Yhat_batch = self.model.forward(X_batch)

        # Perform backward pass to get gradiant
        self.model.backward(X_batch, Yhat_batch, Y_batch)

        # Update weights in gradiant step
        self.model.w = self.model.w - self.learning_rate * self.model.grad

        # Calculate cross entropy loss
        loss = cross_entropy_loss(Y_batch, Yhat_batch)

        return loss
示例#10
0
    def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray):
        """
        Perform forward, backward and gradient descent step here.
        The function is called once for every batch (see trainer.py) to perform the train step.
        The function returns the mean loss value which is then automatically logged in our variable self.train_history.

        Args:
            X: one batch of images
            Y: one batch of labels
        Returns:
            loss value (float) on batch
        """
        # TODO: Implement this function (task 2c)
        outputs = self.model.forward(X_batch)
        self.model.backward(X_batch, outputs, Y_batch)

        if self.use_momentum:
            for i in range(len(self.model.ws)):
                # Momentum implementation was a bit unclear in assignment, but from Piazza question @115 I assume this is correct
                self.momentum[i] = self.previous_grads[
                    i] + self.momentum_gamma * self.momentum[i]
                self.model.ws[i] = self.model.ws[
                    i] - self.learning_rate * self.momentum[i]
        else:
            for i in range(len(self.model.ws)):
                self.model.ws[i] = self.model.ws[
                    i] - self.learning_rate * self.model.grads[i]

        self.previous_grads = np.copy(self.model.grads)

        return cross_entropy_loss(Y_batch, outputs)
示例#11
0
    def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray):
        """
        Perform forward, backward and gradient descent step here.
        The function is called once for every batch (see trainer.py) to perform the train step.
        The function returns the mean loss value which is then automatically logged in our variable self.train_history.

        Args:
            X: one batch of images
            Y: one batch of labels
        Returns:
            loss value (float) on batch
        """
        # TODO: Implement this function (task 2c)
        loss = 0
        output = self.model.forward(X_batch)
        self.model.backward(X_batch, output, Y_batch)

        #if without momentum do gradient step of approperiate weight, otherwise do momentum gradient step:
        if not self.use_momentum:
            for i, grad in zip(range(len(self.model.ws)), self.model.grads):
                self.model.ws[i] = self.model.ws[i] - self.learning_rate * grad
        else:
            for i, grad in zip(range(len(self.model.ws)), self.model.grads):
                self.vs[i] = grad + self.momentum_gamma * self.vs[i]
                self.model.ws[
                    i] = self.model.ws[i] - self.learning_rate * self.vs[i]

        loss = cross_entropy_loss(Y_batch, output)
        return loss
示例#12
0
def train_and_evaluate(
        neurons_per_layer: int,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_improved_sigmoid: bool,
        use_improved_weight_init: bool,
        use_momentum: bool,
        momentum_gamma: float,
        use_shift=False):

    model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                         use_improved_weight_init)
    model, train_loss, val_loss, train_accuracy, val_accuracy = train(
        model,
        datasets,
        num_epochs=num_epochs,
        learning_rate=learning_rate,
        batch_size=batch_size,
        use_shuffle=use_shuffle,
        use_momentum=use_momentum,
        momentum_gamma=momentum_gamma,
        use_shift=use_shift)

    print("----------", use_shuffle, use_improved_sigmoid,
          use_improved_weight_init, use_momentum, momentum_gamma, "----------")
    print("Final Train Cross Entropy Loss:",
          cross_entropy_loss(Y_train, model.forward(X_train)))
    print("Final Validation Cross Entropy Loss:",
          cross_entropy_loss(Y_val, model.forward(X_val)))
    print("Final Test Cross Entropy Loss:",
          cross_entropy_loss(Y_test, model.forward(X_test)))

    print("Final Train accuracy:", calculate_accuracy(X_train, Y_train, model))
    print("Final Validation accuracy:",
          calculate_accuracy(X_val, Y_val, model))
    print("Final Test accuracy:", calculate_accuracy(X_test, Y_test, model))
    return train_loss, val_loss, train_accuracy, val_accuracy
示例#13
0
    def validation_step(self):
        """
        Perform a validation step to evaluate the model at the current step for the validation set.
        Also calculates the current accuracy of the model on the train set.
        Returns:
            loss (float): cross entropy loss over the whole dataset
            accuracy_ (float): accuracy over the whole dataset
        Returns:
            loss value (float) on batch
        """
        # NO NEED TO CHANGE THIS FUNCTION
        logits = self.model.forward(self.X_val)
        loss = cross_entropy_loss(Y_val, logits)

        accuracy_train = calculate_accuracy(X_train, Y_train, self.model)
        accuracy_val = calculate_accuracy(X_val, Y_val, self.model)
        return loss, accuracy_train, accuracy_val
示例#14
0
    def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray):
        """
        Perform forward, backward and gradient descent step here.
        The function is called once for every batch (see trainer.py) to perform the train step.
        The function returns the mean loss value which is then automatically logged in our variable self.train_history.

        Args:
            X: one batch of images
            Y: one batch of labels
        Returns:
            loss value (float) on batch
        """
        out = self.model.forward(X_batch)
        self.model.backward(X_batch, out, Y_batch)
        self.model.w -= self.model.grad * self.learning_rate
        loss = cross_entropy_loss(Y_batch, out)
        return loss
示例#15
0
    def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray):
        """
        Perform forward, backward and gradient descent step here.
        The function is called once for every batch (see trainer.py) to perform the train step.
        The function returns the mean loss value which is then automatically logged in our variable self.train_history.

        Args:
            X: one batch of images
            Y: one batch of labels
        Returns:
            loss value (float) on batch
        """
        # TODO: Implement this function (task 2c)

        loss = 0

        loss = cross_entropy_loss(Y_batch, logits)  # sol

        return loss
示例#16
0
    def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray):
        """
        Perform forward, backward and gradient descent step here.
        The function is called once for every batch (see trainer.py) to perform the train step.
        The function returns the mean loss value which is then automatically logged in our variable self.train_history.

        Args:
            X: one batch of images
            Y: one batch of labels
        Returns:
            loss value (float) on batch
        """
        # TODO: Implement this function (task 2b)

        y = self.model.forward(X_batch)
        self.model.backward(X_batch, y, Y_batch)  # compute the gradient
        delta_W = self.model.grad
        self.model.w = self.model.w - self.learning_rate * delta_W  #perform gradient descent step

        loss = cross_entropy_loss(Y_batch, y)
        return loss
示例#17
0
    def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray):
        """
        Perform forward, backward and gradient descent step here.
        The function is called once for every batch (see trainer.py) to perform the train step.
        The function returns the mean loss value which is then automatically logged in our variable self.train_history.

        Args:
            X: one batch of images
            Y: one batch of labels
        Returns:
            loss value (float) on batch
        """
        # Perform forward pass to get outputs (predictions)
        Yhat_batch = self.model.forward(X_batch)

        # Perform backward pass to get gradiant
        self.model.backward(X_batch, Yhat_batch, Y_batch)

        if self.use_momentum:
            # update delta w (Formula 6)
            for layer_idx, grads in enumerate(self.model.grads):
                self.previous_grads[
                    layer_idx] = grads + self.momentum_gamma * self.previous_grads[
                        layer_idx]

            # Update weights in gradiant step with momentum
            for layer_idx, previous_grads in enumerate(self.previous_grads):
                self.model.ws[layer_idx] = self.model.ws[
                    layer_idx] - self.learning_rate * previous_grads

        else:
            # Update weights in gradiant step
            for layer_idx, grads in enumerate(self.model.grads):
                self.model.ws[layer_idx] = self.model.ws[
                    layer_idx] - self.learning_rate * grads

        # Calculate cross entropy loss
        loss = cross_entropy_loss(Y_batch, Yhat_batch)

        return loss
示例#18
0
    def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray):
        """
        Perform forward, backward and gradient descent step here.
        The function is called once for every batch (see trainer.py) to perform the train step.
        The function returns the mean loss value which is then automatically logged in our variable self.train_history.

        Args:
            X: one batch of images
            Y: one batch of labels
        Returns:
            loss value (float) on batch
        """
        logits = self.model.forward(X_batch)
        self.model.backward(X_batch, logits, Y_batch)
        for i, w in enumerate(self.model.ws):
            if self.use_momentum:
                w -= self.previous_grads[i] * self.learning_rate  # t
                self.previous_grads[i] = self.model.grads[i] + \
                    self.momentum_gamma*self.previous_grads[i]  # t+1
            else:
                w -= self.model.grads[i] * self.learning_rate
        loss = cross_entropy_loss(Y_batch, logits)
        return loss
示例#19
0
    def train_step(self, X_batch: np.ndarray, Y_batch: np.ndarray):
        """
        Perform forward, backward and gradient descent step here.
        The function is called once for every batch (see trainer.py) to perform the train step.
        The function returns the mean loss value which is then automatically logged in our variable self.train_history.

        Args:
            X: one batch of images
            Y: one batch of labels
        Returns:
            loss value (float) on batch
        """
        # TODO: Implement this function (task 2c)
        loss = 0
        outputs = self.model.forward(X_batch)
        self.model.backward(X_batch, outputs, Y_batch)
        if (self.use_momentum):
            #FIX THIS
            self.previous_grads[0] = self.model.grads[
                0] + self.momentum_gamma * self.previous_grads[0]
            self.previous_grads[1] = self.model.grads[
                1] + self.momentum_gamma * self.previous_grads[1]

            self.model.ws[0] = self.model.ws[0] - self.learning_rate * (
                self.previous_grads[0])
            self.model.ws[1] = self.model.ws[1] - self.learning_rate * (
                self.previous_grads[1])

        else:
            self.model.ws[0] = self.model.ws[
                0] - self.model.grads[0] * self.learning_rate
            self.model.ws[1] = self.model.ws[
                1] - self.model.grads[1] * self.learning_rate

        loss = cross_entropy_loss(Y_batch, outputs)
        return loss
示例#20
0
def train(
    num_epochs: int,
    learning_rate: float,
    batch_size: int,
    l2_reg_lambda: float  # Task 3 hyperparameter. Can be ignored before this.
):
    """
        Function that implements logistic regression through mini-batch
        gradient descent for the given hyperparameters
    """
    global X_train, X_val, X_test
    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    model = BinaryModel(l2_reg_lambda, X_train.shape[0])

    # initialize weights and outputs
    model.w = np.zeros((785, 1))

    # for early stopping
    is_val_loss_increasing = [False] * num_increases

    global_step = 0
    for epoch in range(num_epochs):
        for step in range(num_batches_per_epoch):
            # Select our mini-batch of images / labels
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            # forward and backward pass
            output = model.forward(X_batch)
            model.backward(X_batch, output, Y_batch)

            # update weights
            model.w = model.w - learning_rate * model.grad

            # Track training loss continuously
            output_train = model.forward(X_train)
            _train_loss = cross_entropy_loss(Y_train, output_train)
            train_loss[global_step] = _train_loss
            # Track validation loss / accuracy every time we progress 20% through the dataset
            if global_step % num_steps_per_val == 0:
                output_val = model.forward(X_val)
                _val_loss = cross_entropy_loss(Y_val, output_val)
                val_loss[global_step] = _val_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

                # early stopping
                stopping = False

                if with_stopping == True and global_step > 0:
                    stopping = early_stopping(num_increases,
                                              is_val_loss_increasing, val_loss,
                                              global_step, num_steps_per_val)

                if with_stopping == True and stopping is True:
                    break

            global_step += 1

        if with_stopping == True and stopping is True:
            print('Epoch =', epoch)
            break

    return model, train_loss, val_loss, train_accuracy, val_accuracy
示例#21
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        use_early_stopping: bool,
        momentum_gamma: float):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    momentum = [0 for i in range(len(model.grads))]

    #Variables used for early stopping
    mean_val_loss = []
    list_val_losses = []

    global_loss_counter = 2
    global_step = 0
    for epoch in range(num_epochs):
        # Shuffling before next epoch
        if use_shuffle == True:
            shuffle_in_unison(X_train, Y_train)
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            y_hat = model.forward(X_batch)
            model.backward(X_batch, y_hat, Y_batch)

            if use_momentum == True:
                momentum[0] = (1 - momentum_gamma) * model.grads[
                    0] + momentum_gamma * momentum[0]
                momentum[1] = (1 - momentum_gamma) * model.grads[
                    1] + momentum_gamma * momentum[1]
                model.ws[0] += -1 * learning_rate * (momentum[0])
                model.ws[1] += -1 * learning_rate * (momentum[1])
            else:
                model.ws[0] += -1 * learning_rate * model.grads[0]
                model.ws[1] += -1 * learning_rate * model.grads[1]

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            if (global_step % num_steps_per_val) == 0:
                _val_loss = cross_entropy_loss(Y_val, model.forward(X_val))
                val_loss[global_step] = _val_loss

                _train_loss = cross_entropy_loss(Y_batch, y_hat)
                train_loss[global_step] = _train_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

                #Early stopping
                if use_early_stopping == True:
                    list_val_losses.append(_val_loss)
                    if global_loss_counter % 5 == 0:
                        mean_val_loss.append(np.mean(list_val_losses))
                        list_val_losses = []
                        if global_loss_counter % 10 == 0:
                            if mean_val_loss[0] < mean_val_loss[1]:
                                return model, train_loss, val_loss, train_accuracy, val_accuracy
                            mean_val_loss = []
                    global_loss_counter += 1

            global_step += 1
    return model, train_loss, val_loss, train_accuracy, val_accuracy
示例#22
0
def train(num_epochs: int, learning_rate: float, batch_size: int,
          l2_reg_lambda: float):
    """
        Function that implements logistic regression through mini-batch
        gradient descent for the given hyperparameters
    """
    global X_train, X_val, X_test
    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    model = BinaryModel(l2_reg_lambda)

    global_step = 0
    last_val_loss = 1
    best_val_loss = 1
    best_weights = None
    for epoch in range(num_epochs):
        for step in range(num_batches_per_epoch):
            # Select our mini-batch of images / labels
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            # Forward pass
            train_outputs = model.forward(X_batch)

            # Backward propagation
            model.backward(X_batch, train_outputs, Y_batch)
            model.w -= learning_rate * model.grad

            # Track training loss continuously
            _train_loss = cross_entropy_loss(Y_batch, train_outputs)
            train_loss[global_step] = _train_loss

            # Track validation loss / accuracy every time we progress 20% through the dataset
            if global_step % num_steps_per_val == 0:
                val_outputs = model.forward(X_val)
                _val_loss = cross_entropy_loss(Y_val, val_outputs)
                val_loss[global_step] = _val_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1

        # Compute validation loss for early stopping
        val_outputs = model.forward(X_val)
        _val_loss = cross_entropy_loss(Y_val, val_outputs)
        if _val_loss <= best_val_loss:
            best_weights = model.w
            best_val_loss = _val_loss
        if _val_loss > last_val_loss:
            model.w = best_weights
            break
        last_val_loss = _val_loss
    return model, train_loss, val_loss, train_accuracy, val_accuracy
示例#23
0
X_train, Y_train, X_val, Y_val, X_test, Y_test = utils.load_binary_dataset(
    category1, category2, validation_percentage)

# hyperparameters
num_epochs = 50
learning_rate = 0.2
batch_size = 128
l2_reg_lambda = 0
model, train_loss, val_loss, train_accuracy, val_accuracy = train(
    num_epochs=num_epochs,
    learning_rate=learning_rate,
    batch_size=batch_size,
    l2_reg_lambda=l2_reg_lambda)

print("Final Train Cross Entropy Loss:",
      cross_entropy_loss(Y_train, model.forward(pre_process_images(X_train))))
print("Final  Test Entropy Loss:",
      cross_entropy_loss(Y_test, model.forward(pre_process_images(X_test))))
print("Final Validation Cross Entropy Loss:",
      cross_entropy_loss(Y_val, model.forward(pre_process_images(X_val))))

print("Train accuracy:", calculate_accuracy(X_train, Y_train, model))
print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model))
print("Test accuracy:", calculate_accuracy(X_test, Y_test, model))

# Plot loss
#plt.ylim([0., .4])
utils.plot_loss(train_loss, "Training Loss")
utils.plot_loss(val_loss, "Validation Loss")
plt.legend()
plt.savefig("binary_train_loss.png")
示例#24
0
def train(
    num_epochs: int,
    learning_rate: float,
    batch_size: int,
    l2_reg_lambda: float  # Task 3 hyperparameter. Can be ignored before this.
):
    """
        Function that implements logistic regression through mini-batch
        gradient descent for the given hyperparameters
    """
    global X_train, X_val, X_test, early_stopping_step
    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    model = BinaryModel(l2_reg_lambda)

    # Early stopping var init
    last_loss = INT_MAX
    already_failed = 0

    global_step = 0
    for epoch in range(num_epochs):
        for step in range(num_batches_per_epoch):
            # Select our mini-batch of images / labels
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            # The mini-batch gradient descent algorithm for m batches and a single epoch.
            model.backward(X_batch, model.forward(X_batch), Y_batch)
            model.w = model.w - learning_rate * model.grad

            # Track training loss continuously
            _train_loss = cross_entropy_loss(Y_batch, model.forward(X_batch))
            train_loss[global_step] = _train_loss[0, 0]

            # Track validation loss / accuracy every time we progress 20% through the dataset
            if global_step % num_steps_per_val == 0:
                _val_loss = cross_entropy_loss(Y_val, model.forward(X_val))
                val_loss[global_step] = _val_loss[0, 0]

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

                # Early stopping criteria
                if (_val_loss[0, 0] > last_loss and already_failed > 20):
                    # Stop early
                    #print("Early stopping kicked in at epoch nr.:",epoch+1)
                    #return model, train_loss, val_loss, train_accuracy, val_accuracy
                    if early_stopping_step == 0:
                        early_stopping_step = global_step

                # Means failed this round
                elif (_val_loss[0, 0] > last_loss):
                    already_failed += 1

                # The loss improved this round, reset counter
                else:
                    last_loss = _val_loss[0, 0]
                    already_failed = 0

            global_step += 1
    return model, train_loss, val_loss, train_accuracy, val_accuracy
示例#25
0
        model2,
        learning_rate,
        batch_size,
        shuffle_data,
        X_train,
        Y_train,
        X_val,
        Y_val,
    )
    train_history2, val_history2 = trainer2.train(num_epochs)

    print("model from 4e")
    print("Train accuracy:", calculate_accuracy(X_train, Y_train, model2))
    print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model2))
    print("Final Validation Cross Entropy Loss:",
          cross_entropy_loss(Y_val, model2.forward(X_val)))

    #Plotting training/validation - loss/accuracy comparing the two models:
    plt.figure(figsize=(20, 12))
    plt.subplot(1, 2, 1)
    plt.ylim([0., .9])
    utils.plot_loss(train_history2["loss"], "Train - 10 hidden layers")
    utils.plot_loss(train_history1["loss"], "Train - 2 hidden layers")
    utils.plot_loss(train_history["loss"], "Train - 1 hidden layer")
    utils.plot_loss(val_history2["loss"], "Validation - 10 hidden layers")
    utils.plot_loss(val_history1["loss"], "Validation - 2 hidden layers")
    utils.plot_loss(val_history["loss"], "Validation - 1 hidden layer")
    #similar legend as accuracy plot:
    plt.legend()
    plt.xlabel("Number of Training Steps")
    plt.ylabel("Training/Validation Loss")
示例#26
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float,
        all_tricks=False):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Important hyper parameter setting
    if use_momentum:
        learning_rate = 0.02

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    # Early stop variables
    early_stopped_weight_j = np.zeros(
        (model.ws[0].shape[0], model.ws[0].shape[1]))
    early_stopped_weight_k = np.zeros(
        (model.ws[1].shape[0], model.ws[1].shape[1]))
    early_stop_counter = 0
    best_loss = float("inf")

    global_step = 0
    for epoch in tqdm(range(num_epochs)):
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            # Compute the gradient
            outputs = model.forward(X_batch)
            model.backward(X_batch, outputs, Y_batch)

            # Update the weights with or without task3d, momemtum gradient
            for layer in range(len(model.neurons_per_layer)):
                if use_momentum:
                    new_weights = model.ws[layer] - learning_rate * model.grads[
                        layer] + momentum_gamma * model.delta_w[layer]
                    model.delta_w[layer] = new_weights - model.ws[layer]
                    model.ws[layer] = new_weights
                else:
                    model.ws[layer] = model.ws[
                        layer] - learning_rate * model.grads[layer]

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            if (global_step % num_steps_per_val) == 0:
                # Test the validation data on the network
                outputs_validation = model.forward(X_val)
                _val_loss = cross_entropy_loss(Y_val, outputs_validation)
                val_loss[global_step] = _val_loss

                # Track training loss over the entire X_Train and not only the current batch
                # once every validation epoch
                outputs_training = model.forward(X_train)
                _train_loss = cross_entropy_loss(Y_train, outputs_training)
                train_loss[global_step] = _train_loss

                # Track the accuracy
                if not all_tricks:
                    train_accuracy[global_step] = calculate_accuracy(
                        outputs_training, Y_train, model)
                    val_accuracy[global_step] = calculate_accuracy(
                        outputs_validation, Y_val, model)

                # Early stop implementation

                # If the loss does not reduce compared to best loss, increment counter
                # Otherwise, set the counter to 0 and update best loss
                if _val_loss >= best_loss:
                    early_stop_counter += 1
                else:
                    early_stop_counter = 0
                    best_loss = _val_loss
                    early_stopped_weight_j = model.ws[0]
                    early_stopped_weight_k = model.ws[1]
                # If 30 times in a row a new best loss was not achieved, stop the program
                if early_stop_counter == 30:
                    print(
                        "\nThe cross entropy loss for validation data increased too much, thus triggering "
                        "the early stop at step : " + str(global_step) +
                        " and epoch : " + str(epoch))
                    model.ws[0] = early_stopped_weight_j
                    model.ws[1] = early_stopped_weight_k
                    return model, train_loss, val_loss, train_accuracy, val_accuracy

            global_step += 1
        # Task 3a: Shuffle training samples after each epoch
        if use_shuffle:
            # Use the shuffle function from sklearn
            X_train, Y_train = shuffle(X_train, Y_train)

    return model, train_loss, val_loss, train_accuracy, val_accuracy
示例#27
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    # Early stop variables
    early_stopped_weight_j = np.zeros(
        (model.ws[0].shape[0], model.ws[0].shape[1]))
    early_stopped_weight_k = np.zeros(
        (model.ws[1].shape[0], model.ws[1].shape[1]))
    early_stop_counter = 0
    best_loss = float("inf")

    global_step = 0
    for epoch in tqdm(range(num_epochs)):
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            outputs = model.forward(X_batch)
            model.backward(X_batch, outputs, Y_batch)
            # Update the weights
            model.ws[0] = model.ws[0] - learning_rate * model.grads[0]
            model.ws[1] = model.ws[1] - learning_rate * model.grads[1]

            # Track training loss continuously over the entire X_Train and not only the current batch
            #outputs_training = model.forward(X_train)
            #_train_loss = cross_entropy_loss(Y_batch, outputs)
            #train_loss[global_step] = _train_loss

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            if (global_step % num_steps_per_val) == 0:
                # Test the validation data on the network
                outputs_validation = model.forward(X_val)
                _val_loss = cross_entropy_loss(Y_val, outputs_validation)
                val_loss[global_step] = _val_loss

                # Track training loss over the entire X_Train and not only the current batch
                # once every validation epoch
                outputs_training = model.forward(X_train)
                _train_loss = cross_entropy_loss(Y_train, outputs_training)
                train_loss[global_step] = _train_loss

                # Early stop implementation

                # If the loss does not reduce compared to best loss, increment counter
                # Otherwise, set the counter to 0 and update best loss
                if _val_loss >= best_loss:
                    early_stop_counter += 1
                else:
                    early_stop_counter = 0
                    best_loss = _val_loss
                    early_stopped_weight_j = model.ws[0]
                    early_stopped_weight_k = model.ws[1]
                # If 30 times in a row a new best loss was not achieved, stop the program
                if early_stop_counter == 30:
                    print(
                        "The cross entropy loss for validation data increased too much, thus triggering "
                        "the early stop at step : " + str(global_step) +
                        " and epoch : " + str(epoch))
                    model.ws[0] = early_stopped_weight_j
                    model.ws[1] = early_stopped_weight_k
                    return model, train_loss, val_loss, train_accuracy, val_accuracy

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1
    return model, train_loss, val_loss, train_accuracy, val_accuracy
示例#28
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float,
        use_shift=False):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5

    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}

    #Variables for early stopping
    last_val_loss = 1
    best_val_loss = 1
    best_weights = None
    increased_last_time = False

    # Store last weights update term for momentum
    last_weights_update = []
    for l in range(len(model.ws)):
        last_weights_update.append(np.zeros_like(model.ws[l]))

    global_step = 0
    for epoch in range(num_epochs):
        print("Epoch:", epoch)
        for step in range(num_batches_per_epoch):
            shift = np.random.randint(low=-2, high=3, size=batch_size)
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            X_local = X_batch
            if use_shift:
                X_local = np.roll(X_batch[:, :784], shift, axis=1)
                ones = np.ones((X_local.shape[0], 1))
                X_local = np.concatenate((X_local, ones), axis=1)

            train_output = model.forward(X_batch)

            model.backward(X_batch, train_output, Y_batch)

            for l in range(len(model.ws)):
                if use_momentum:
                    update_term = momentum_gamma * last_weights_update[
                        l] - learning_rate * model.grads[l]
                    model.ws[l] += update_term
                    last_weights_update[l] = update_term
                else:
                    model.ws[l] -= learning_rate * model.grads[l]

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            if (global_step % num_steps_per_val) == 0:
                val_output = model.forward(X_val)
                _val_loss = cross_entropy_loss(Y_val, val_output)
                val_loss[global_step] = _val_loss

                train_output = model.forward(X_train)
                _train_loss = cross_entropy_loss(Y_train, train_output)
                train_loss[global_step] = _train_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1

        # In order to keep labels in the right order, we shuffle an array of indices
        # and then apply this ordering to both inputs and labels
        if use_shuffle:
            indices = np.arange(X_train.shape[0])
            np.random.shuffle(indices)
            X_train = X_train[indices]
            Y_train = Y_train[indices]

        # Compute validation loss for early stopping
        val_outputs = model.forward(X_val)
        _val_loss = cross_entropy_loss(Y_val, val_outputs)
        if _val_loss <= best_val_loss:
            best_weights = model.ws
            best_val_loss = _val_loss
        if _val_loss > last_val_loss:
            if increased_last_time:
                model.ws = best_weights
                break
            else:
                increased_last_time = True
        else:
            increased_last_time = False
        last_val_loss = _val_loss

    return model, train_loss, val_loss, train_accuracy, val_accuracy
示例#29
0
    # ANY PARTS OF THE CODE BELOW THIS CAN BE CHANGED.

    # Intialize model
    model = BinaryModel()
    # Train model
    trainer = LogisticTrainer(
        model, learning_rate, batch_size, shuffle_dataset,
        early_stopping, X_train, Y_train, X_val, Y_val,
    )
    train_history, val_history = trainer.train(num_epochs)

    # Plot and print everything you want of information

    print("Final Train Cross Entropy Loss:",
          cross_entropy_loss(Y_train, model.forward(X_train)))
    print("Final Validation Cross Entropy Loss:",
          cross_entropy_loss(Y_val, model.forward(X_val)))
    print("Train accuracy:", calculate_accuracy(X_train, Y_train, model))
    print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model))

    # Plot loss for first model (task 2b)
    plt.ylim([0., .2])
    utils.plot_loss(train_history["loss"],
                    "Training Loss", npoints_to_average=10)
    utils.plot_loss(val_history["loss"], "Validation Loss")
    plt.legend()
    plt.xlabel("Number of Training Steps")
    plt.ylabel("Cross Entropy Loss - Average")
    plt.savefig("task2b_binary_train_loss.png")
    plt.show()
示例#30
0
        learning_rate,
        batch_size,
        shuffle_data,
        X_train,
        Y_train,
        X_val,
        Y_val,
    )
    train_history_naked, val_history_naked = trainer_naked.train(num_epochs)

    print("just basic")
    print("Train accuracy:", calculate_accuracy(X_train, Y_train, model_naked))
    print("Validation accuracy:", calculate_accuracy(X_val, Y_val,
                                                     model_naked))
    print("Final Validation Cross Entropy Loss:",
          cross_entropy_loss(Y_val, model_naked.forward(X_val)))
    print("Final Train Cross Entropy Loss:",
          cross_entropy_loss(Y_train, model_naked.forward(X_train)))

    ######1nd model - improved weights######
    use_improved_sigmoid = False
    use_improved_weight_init = True
    use_momentum = False
    model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                         use_improved_weight_init)
    trainer = SoftmaxTrainer(
        momentum_gamma,
        use_momentum,
        model,
        learning_rate,
        batch_size,