Пример #1
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    
    if use_momentum:
        learning_rate = 0.02

    global_step = 0
    for epoch in range(num_epochs):
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            prev_grads = model.grads

            outputs = model.forward(X_batch)
            model.backward(X_batch, outputs, Y_batch)
            for i in range(len(model.ws)):
                if use_momentum:
                    model.ws[i] = model.ws[i] - learning_rate  * (model.grads[i] + momentum_gamma * prev_grads[i])
                else:
                    model.ws[i] = model.ws[i] - learning_rate * model.grads[i]

            if (global_step % num_steps_per_val) == 0:
                _val_loss = cross_entropy_loss(Y_val, model.forward(X_val))
                val_loss[global_step] = _val_loss

                _train_loss = cross_entropy_loss(Y_train, model.forward(X_train))
                train_loss[global_step] = _train_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1
        # shuffle training examples after each epoch
        if use_shuffle:
            X_train, Y_train = unison_shuffled_copies(X_train, Y_train)
    return model, train_loss, val_loss, train_accuracy, val_accuracy
Пример #2
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}

    global_step = 0
    for epoch in range(num_epochs):
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            outputs = model.forward(X_batch)
            model.backward(X_batch, outputs, Y_batch)
            # update weigths
            model.ws[-1] = model.ws[-1] - learning_rate * model.grads[-1]
            model.ws[-2] = model.ws[-2] - learning_rate * model.grads[-2]

            if (global_step % num_steps_per_val) == 0:
                _outputs_train = model.forward(X_train)
                _train_loss = cross_entropy_loss(Y_train, _outputs_train)
                train_loss[global_step] = _train_loss

                _outputs_val = model.forward(X_val)
                _val_loss = cross_entropy_loss(Y_val, _outputs_val)
                val_loss[global_step] = _val_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1
    return model, train_loss, val_loss, train_accuracy, val_accuracy
Пример #3
0
def train_and_evaluate(
        neurons_per_layer: int,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_improved_sigmoid: bool,
        use_improved_weight_init: bool,
        use_momentum: bool,
        momentum_gamma: float,
        use_shift=False):

    model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                         use_improved_weight_init)
    model, train_loss, val_loss, train_accuracy, val_accuracy = train(
        model,
        datasets,
        num_epochs=num_epochs,
        learning_rate=learning_rate,
        batch_size=batch_size,
        use_shuffle=use_shuffle,
        use_momentum=use_momentum,
        momentum_gamma=momentum_gamma,
        use_shift=use_shift)

    print("----------", use_shuffle, use_improved_sigmoid,
          use_improved_weight_init, use_momentum, momentum_gamma, "----------")
    print("Final Train Cross Entropy Loss:",
          cross_entropy_loss(Y_train, model.forward(X_train)))
    print("Final Validation Cross Entropy Loss:",
          cross_entropy_loss(Y_val, model.forward(X_val)))
    print("Final Test Cross Entropy Loss:",
          cross_entropy_loss(Y_test, model.forward(X_test)))

    print("Final Train accuracy:", calculate_accuracy(X_train, Y_train, model))
    print("Final Validation accuracy:",
          calculate_accuracy(X_val, Y_val, model))
    print("Final Test accuracy:", calculate_accuracy(X_test, Y_test, model))
    return train_loss, val_loss, train_accuracy, val_accuracy
Пример #4
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    outputs = model.forward(X)
    return np.mean(np.argmax(targets, axis=1) == np.argmax(outputs, axis=1))
Пример #5
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    num_classes = targets.shape[1]
    predictions = np.argmax(model.forward(X), axis=1)
    accuracy = np.count_nonzero(
        predictions == np.argmax(targets, axis=1)) / X.shape[0]
    return accuracy
Пример #6
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # perform predictions
    Yhat = model.forward(X)

    # calculate accuracy by dividing the correct predictions with the total number of predictions
    accuracy = (Yhat.argmax(axis=1) == targets.argmax(axis=1)).mean()
    return accuracy
Пример #7
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # TODO: Implement this function (copy from last assignment)
    logits = model.forward(X)
    outputs = np.zeros_like(logits)
    outputs[np.arange(len(logits)), logits.argmax(1)] = 1
    accuracy = np.mean((outputs == targets).all(1))
    return accuracy
Пример #8
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    targets_indices = np.argmax(targets, axis=1)
    outputs_indices = np.argmax(model.forward(X), axis=1)
    result = np.equal(targets_indices, outputs_indices)
    result.size
    accuracy = (result.sum()) / result.size
    return accuracy
Пример #9
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # TODO: Implement this function (copy from last assignment)
    y_hat = np.array(model.forward(X))
    y_predicted_position = np.argmax(y_hat, axis=1)
    y_position = np.argmax(targets, axis=1)
    accuracy = np.count_nonzero(
        y_position == y_predicted_position) / X.shape[0]
    return accuracy
Пример #10
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # TODO: Implement this function (copy from last assignment)
    accuracy = 0.0
    logits = model.forward(X)
    logits_max, targets_max = np.argmax(logits, axis=1), np.argmax(targets,
                                                                   axis=1)
    accuracy = (1 / targets.shape[0]) * np.sum(
        [(1 if l == t else 0) for (l, t) in zip(logits_max, targets_max)])
    return accuracy
Пример #11
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # TODO: Implement this function (copy from last assignment)
    predictions = model.forward(X)
    num_predictions = predictions.shape[0]

    correct_predictions = np.sum(
        np.argmax(predictions, axis=1) == np.argmax(targets, axis=1))

    return correct_predictions / num_predictions
Пример #12
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # TODO: Implement this function (copy from last assignment)
    # First computation of the prediction
    outputs = model.forward(X)

    # Convert the prediction into 0 and 1 by setting as 1 the highest value in the 10 outputs, the rest will be 0.
    accuracy = np.sum(
        outputs.argmax(1) == targets.argmax(1)) / targets.shape[0]
    return accuracy
Пример #13
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # TODO: Implement this function (copy from last assignment)
    outputs = model.forward(X)
    max_outputs = np.argmax(outputs, axis=1)
    max_targets = np.argmax(targets, axis=1)

    sum = outputs.shape[0] - np.count_nonzero(max_outputs - max_targets)
    accuracy = sum / outputs.shape[0]
    return accuracy
Пример #14
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # TODO: Implement this function (copy from last assignment)
    accuracy = 0
    output = model.forward(X)
    predictions = one_hot_encode(np.array([np.argmax(output, axis=1)]).T, 10)
    correct_pred = np.count_nonzero(targets * predictions)
    total_pred = output.shape[0]
    accuracy = correct_pred / total_pred
    return accuracy
Пример #15
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    number_of_predictions = X.shape[0]
    number_of_rights = 0
    y_hat = model.forward(X)
    for i in range(0, number_of_predictions):
        y_hat[i] = np.around(y_hat[i])
        if np.array_equal(y_hat[i], targets[i]):
            number_of_rights += 1
    accuracy = number_of_rights / number_of_predictions
    return accuracy
Пример #16
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # TODO: Implement this function (copy from last assignment)
    predictions = model.forward(X)
    accuracy = 0
    for n in range(X.shape[0]):
        prediction = np.argmax(predictions[n, :])
        target = np.argmax(targets[n, :])
        if prediction == target:
            accuracy += 1

    return accuracy / X.shape[0]
Пример #17
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    outputs = model.forward(X)
    N = targets.shape[0]
    correctOutputs = 0

    for i in range(N):
        target = np.where(targets[i] == 1)[0][0]
        output = np.argmax(outputs[i])
        if target == output:
            correctOutputs += 1
    return correctOutputs / N
Пример #18
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """

    # forward pass
    logits = model.forward(X)

    # finding the index of the max values for both arrays
    logits = logits.argmax(axis=1)
    targets = targets.argmax(axis=1)

    # counting the equal entries and averaging
    accuracy = np.count_nonzero(np.equal(targets, logits)) / X.shape[0]

    return accuracy
Пример #19
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # Compute the outputs
    outputs = model.forward(X)
    # Counting the correct predictions
    nb_predictions = outputs.shape[0]
    nb_correct_predictions = 0
    for row, output in enumerate(outputs):
        index = np.argmax(output)
        if targets[row][index] == 1:
            nb_correct_predictions += 1

    accuracy = nb_correct_predictions / nb_predictions
    return accuracy
Пример #20
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    # Copied from last assignment.
    accuracy = 0

    lgts = model.forward(X)

    lgts_max = np.argmax(lgts, axis=1)
    targets_max = np.argmax(targets, axis=1)

    accuracy = ((1 / targets.shape[0]) *
                np.sum([(1 if l == t else 0)
                        for (l, t) in zip(lgts_max, targets_max)]))
    return accuracy
Пример #21
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray, model: SoftmaxModel) -> float:
    """
    Args:
        X: images of shape [batch size, 785]
        targets: labels/targets of each image of shape: [batch size, 10]
        model: model of class SoftmaxModel
    Returns:
        Accuracy (float)
    """
    logits = model.forward(X)
    
    correctPredictions = 0.0
    
    numberOfPredictions = logits.shape[0]
    
    for i in range(logits.shape[0]):
        prediction = np.argmax(logits[i])
        target = np.argmax(targets[i])
        
        if(target == prediction):
            correctPredictions += 1
    
    accuracy = correctPredictions / numberOfPredictions
    return accuracy
Пример #22
0
        model2,
        learning_rate,
        batch_size,
        shuffle_data,
        X_train,
        Y_train,
        X_val,
        Y_val,
    )
    train_history2, val_history2 = trainer2.train(num_epochs)

    print("model from 4e")
    print("Train accuracy:", calculate_accuracy(X_train, Y_train, model2))
    print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model2))
    print("Final Validation Cross Entropy Loss:",
          cross_entropy_loss(Y_val, model2.forward(X_val)))

    #Plotting training/validation - loss/accuracy comparing the two models:
    plt.figure(figsize=(20, 12))
    plt.subplot(1, 2, 1)
    plt.ylim([0., .9])
    utils.plot_loss(train_history2["loss"], "Train - 10 hidden layers")
    utils.plot_loss(train_history1["loss"], "Train - 2 hidden layers")
    utils.plot_loss(train_history["loss"], "Train - 1 hidden layer")
    utils.plot_loss(val_history2["loss"], "Validation - 10 hidden layers")
    utils.plot_loss(val_history1["loss"], "Validation - 2 hidden layers")
    utils.plot_loss(val_history["loss"], "Validation - 1 hidden layer")
    #similar legend as accuracy plot:
    plt.legend()
    plt.xlabel("Number of Training Steps")
    plt.ylabel("Training/Validation Loss")
Пример #23
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5
    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}
    # Early stop variables
    early_stopped_weight_j = np.zeros(
        (model.ws[0].shape[0], model.ws[0].shape[1]))
    early_stopped_weight_k = np.zeros(
        (model.ws[1].shape[0], model.ws[1].shape[1]))
    early_stop_counter = 0
    best_loss = float("inf")

    global_step = 0
    for epoch in tqdm(range(num_epochs)):
        for step in range(num_batches_per_epoch):
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            outputs = model.forward(X_batch)
            model.backward(X_batch, outputs, Y_batch)
            # Update the weights
            model.ws[0] = model.ws[0] - learning_rate * model.grads[0]
            model.ws[1] = model.ws[1] - learning_rate * model.grads[1]

            # Track training loss continuously over the entire X_Train and not only the current batch
            #outputs_training = model.forward(X_train)
            #_train_loss = cross_entropy_loss(Y_batch, outputs)
            #train_loss[global_step] = _train_loss

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            if (global_step % num_steps_per_val) == 0:
                # Test the validation data on the network
                outputs_validation = model.forward(X_val)
                _val_loss = cross_entropy_loss(Y_val, outputs_validation)
                val_loss[global_step] = _val_loss

                # Track training loss over the entire X_Train and not only the current batch
                # once every validation epoch
                outputs_training = model.forward(X_train)
                _train_loss = cross_entropy_loss(Y_train, outputs_training)
                train_loss[global_step] = _train_loss

                # Early stop implementation

                # If the loss does not reduce compared to best loss, increment counter
                # Otherwise, set the counter to 0 and update best loss
                if _val_loss >= best_loss:
                    early_stop_counter += 1
                else:
                    early_stop_counter = 0
                    best_loss = _val_loss
                    early_stopped_weight_j = model.ws[0]
                    early_stopped_weight_k = model.ws[1]
                # If 30 times in a row a new best loss was not achieved, stop the program
                if early_stop_counter == 30:
                    print(
                        "The cross entropy loss for validation data increased too much, thus triggering "
                        "the early stop at step : " + str(global_step) +
                        " and epoch : " + str(epoch))
                    model.ws[0] = early_stopped_weight_j
                    model.ws[1] = early_stopped_weight_k
                    return model, train_loss, val_loss, train_accuracy, val_accuracy

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1
    return model, train_loss, val_loss, train_accuracy, val_accuracy
Пример #24
0
    use_momentum = False

    model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                         use_improved_weight_init)

    model, train_loss, val_loss, train_accuracy, val_accuracy = train(
        model, [X_train, Y_train, X_val, Y_val, X_test, Y_test],
        num_epochs=num_epochs,
        learning_rate=learning_rate,
        batch_size=batch_size,
        use_shuffle=use_shuffle,
        use_momentum=use_momentum,
        momentum_gamma=momentum_gamma)

    print("Final Train Cross Entropy Loss:",
          cross_entropy_loss(Y_train, model.forward(X_train)))
    print("Final Validation Cross Entropy Loss:",
          cross_entropy_loss(Y_val, model.forward(X_val)))
    print("Final Test Cross Entropy Loss:",
          cross_entropy_loss(Y_test, model.forward(X_test)))

    print("Final Train accuracy:", calculate_accuracy(X_train, Y_train, model))
    print("Final Validation accuracy:",
          calculate_accuracy(X_val, Y_val, model))
    print("Final Test accuracy:", calculate_accuracy(X_test, Y_test, model))

    # Execution time calculation
    end = time.time()
    time_in_seconds = end - start
    if (time_in_seconds > 60):
        print("The process took: " + str(int(time_in_seconds / 60)) + "min " +
Пример #25
0
def calculate_accuracy(X: np.ndarray, targets: np.ndarray,
                       model: SoftmaxModel) -> float:
    outputs = model.forward(X)
    return np.mean(np.argmax(targets, axis=1) == np.argmax(outputs, axis=1))
Пример #26
0
        model,
        learning_rate,
        batch_size,
        shuffle_data,
        X_train,
        Y_train,
        X_val,
        Y_val,
    )
    train_history, val_history = trainer.train(num_epochs)

    print("32 neurons")
    print("Train accuracy:", calculate_accuracy(X_train, Y_train, model))
    print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model))
    print("Final Validation Cross Entropy Loss:",
          cross_entropy_loss(Y_val, model.forward(X_val)))

    ###Model with 128 neurons in hidden layer###
    neurons_per_layer = [128, 10]
    model1 = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                          use_improved_weight_init)
    trainer1 = SoftmaxTrainer(
        momentum_gamma,
        use_momentum,
        model1,
        learning_rate,
        batch_size,
        shuffle_data,
        X_train,
        Y_train,
        X_val,
Пример #27
0
    model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                         use_improved_weight_init)

    model, train_loss, val_loss, train_accuracy, val_accuracy = train(
        model, [X_train, Y_train, X_val, Y_val, X_test, Y_test],
        num_epochs=num_epochs,
        learning_rate=learning_rate,
        batch_size=batch_size,
        use_shuffle=use_shuffle,
        use_momentum=use_momentum,
        momentum_gamma=momentum_gamma,
        all_tricks=all_tricks)

    # Process the results in something readable
    print("Final Train Cross Entropy Loss:",
          cross_entropy_loss(Y_train, model.forward(X_train)))
    print("Final Validation Cross Entropy Loss:",
          cross_entropy_loss(Y_val, model.forward(X_val)))
    print("Final Test Cross Entropy Loss:",
          cross_entropy_loss(Y_test, model.forward(X_test)))

    print("Final Train accuracy:",
          calculate_accuracy(model.forward(X_train), Y_train, model))
    print("Final Validation accuracy:",
          calculate_accuracy(model.forward(X_val), Y_val, model))
    print("Final Test accuracy:",
          calculate_accuracy(model.forward(X_test), Y_test, model))

    title_tricks = str()
    if use_shuffle: title_tricks += "&shuffle"
    if use_improved_sigmoid: title_tricks += "&impr_sigmoid"
Пример #28
0
    # Simple test on one-hot encoding
    Y = np.zeros((1, 1), dtype=int)
    Y[0, 0] = 3
    Y = one_hot_encode(Y, 10)
    assert Y[0, 3] == 1 and Y.sum() == 1, \
        f"Expected the vector to be [0,0,0,1,0,0,0,0,0,0], but got {Y}"

    X_train, Y_train, *_ = utils.load_full_mnist(0.1)
    X_train = pre_process_images(X_train)
    Y_train = one_hot_encode(Y_train, 10)
    assert X_train.shape[1] == 785,\
        f"Expected X_train to have 785 elements per image. Shape was: {X_train.shape}"

    # Modify your network here
    neurons_per_layer = [64, 64, 10]
    use_improved_sigmoid = True
    use_improved_weight_init = True
    model = SoftmaxModel(neurons_per_layer, use_improved_sigmoid,
                         use_improved_weight_init)
    logits = model.forward(X_train)
    np.testing.assert_almost_equal(
        logits.mean(),
        1 / 10,
        err_msg=
        "Since the weights are all 0's, the softmax activation should be 1/10")

    # Gradient approximation check for 100 images
    X_train = X_train[:100]
    Y_train = Y_train[:100]
    gradient_approximation_test(model, X_train, Y_train)
Пример #29
0
    Y_val = one_hot_encode(Y_val, 10)

    print("Training standard model:\n")
    model = SoftmaxModel(
        neurons_per_layer,
        use_improved_sigmoid,
        use_improved_weight_init)
    trainer = SoftmaxTrainer(
        momentum_gamma, use_momentum,
        model, learning_rate, batch_size, shuffle_data,
        X_train, Y_train, X_val, Y_val,
    )
    train_history, val_history = trainer.train(num_epochs)

    print("Final Train Cross Entropy Loss:",
        cross_entropy_loss(Y_train, model.forward(X_train)))
    print("Final Validation Cross Entropy Loss:",
        cross_entropy_loss(Y_val, model.forward(X_val)))
    print("Train accuracy:", calculate_accuracy(X_train, Y_train, model))
    print("Validation accuracy:", calculate_accuracy(X_val, Y_val, model))
    print("\n\n")

    # Example created in assignment text - Comparing with and without shuffling.
    # YOU CAN DELETE EVERYTHING BELOW!

    # model with improved sigmoid
    use_improved_sigmoid = True

    print("Training model with improved sigmoid:\n")

    model_is = SoftmaxModel(
Пример #30
0
def train(
        model: SoftmaxModel,
        datasets: typing.List[np.ndarray],
        num_epochs: int,
        learning_rate: float,
        batch_size: int,
        # Task 3 hyperparameters,
        use_shuffle: bool,
        use_momentum: bool,
        momentum_gamma: float,
        use_shift=False):
    X_train, Y_train, X_val, Y_val, X_test, Y_test = datasets

    # Utility variables
    num_batches_per_epoch = X_train.shape[0] // batch_size
    num_steps_per_val = num_batches_per_epoch // 5

    # Tracking variables to track loss / accuracy
    train_loss = {}
    val_loss = {}
    train_accuracy = {}
    val_accuracy = {}

    #Variables for early stopping
    last_val_loss = 1
    best_val_loss = 1
    best_weights = None
    increased_last_time = False

    # Store last weights update term for momentum
    last_weights_update = []
    for l in range(len(model.ws)):
        last_weights_update.append(np.zeros_like(model.ws[l]))

    global_step = 0
    for epoch in range(num_epochs):
        print("Epoch:", epoch)
        for step in range(num_batches_per_epoch):
            shift = np.random.randint(low=-2, high=3, size=batch_size)
            start = step * batch_size
            end = start + batch_size
            X_batch, Y_batch = X_train[start:end], Y_train[start:end]

            X_local = X_batch
            if use_shift:
                X_local = np.roll(X_batch[:, :784], shift, axis=1)
                ones = np.ones((X_local.shape[0], 1))
                X_local = np.concatenate((X_local, ones), axis=1)

            train_output = model.forward(X_batch)

            model.backward(X_batch, train_output, Y_batch)

            for l in range(len(model.ws)):
                if use_momentum:
                    update_term = momentum_gamma * last_weights_update[
                        l] - learning_rate * model.grads[l]
                    model.ws[l] += update_term
                    last_weights_update[l] = update_term
                else:
                    model.ws[l] -= learning_rate * model.grads[l]

            # Track train / validation loss / accuracy
            # every time we progress 20% through the dataset
            if (global_step % num_steps_per_val) == 0:
                val_output = model.forward(X_val)
                _val_loss = cross_entropy_loss(Y_val, val_output)
                val_loss[global_step] = _val_loss

                train_output = model.forward(X_train)
                _train_loss = cross_entropy_loss(Y_train, train_output)
                train_loss[global_step] = _train_loss

                train_accuracy[global_step] = calculate_accuracy(
                    X_train, Y_train, model)
                val_accuracy[global_step] = calculate_accuracy(
                    X_val, Y_val, model)

            global_step += 1

        # In order to keep labels in the right order, we shuffle an array of indices
        # and then apply this ordering to both inputs and labels
        if use_shuffle:
            indices = np.arange(X_train.shape[0])
            np.random.shuffle(indices)
            X_train = X_train[indices]
            Y_train = Y_train[indices]

        # Compute validation loss for early stopping
        val_outputs = model.forward(X_val)
        _val_loss = cross_entropy_loss(Y_val, val_outputs)
        if _val_loss <= best_val_loss:
            best_weights = model.ws
            best_val_loss = _val_loss
        if _val_loss > last_val_loss:
            if increased_last_time:
                model.ws = best_weights
                break
            else:
                increased_last_time = True
        else:
            increased_last_time = False
        last_val_loss = _val_loss

    return model, train_loss, val_loss, train_accuracy, val_accuracy