def test_train():
    from tensorflow.keras.datasets import mnist
    np.random.seed(seed=1)
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    # Reshape and Normalize data
    X_train = X_train.reshape(-1, 784).astype(np.float64) / 255.0 - 0.5
    y_train = y_train.flatten().astype(np.int32)
    input_dimension = X_train.shape[1]
    indices = list(range(X_train.shape[0]))
    # np.random.shuffle(indices)
    number_of_samples_to_use = 500
    X_train = X_train[indices[:number_of_samples_to_use]]
    y_train = y_train[indices[:number_of_samples_to_use]]
    multi_nn = MultiNN(input_dimension)
    number_of_classes = 10
    activations_list = [multi_nn.sigmoid, multi_nn.sigmoid, multi_nn.linear]
    number_of_neurons_list = [50, 20, number_of_classes]
    for layer_number in range(len(activations_list)):
        multi_nn.add_layer(number_of_neurons_list[layer_number],
                           activation_function=activations_list[layer_number])
    for layer_number in range(len(multi_nn.weights)):
        W = multi_nn.get_weights_without_biases(layer_number)
        np.random.seed(seed=1)
        W = tf.Variable((np.random.randn(*W.shape) - 0.0) * 0.1,
                        trainable=True)
        multi_nn.set_weights_without_biases(W, layer_number)
        b = multi_nn.get_biases(layer_number=layer_number)
        b = tf.Variable(np.zeros(b.shape) * 0, trainable=True)
        multi_nn.set_biases(b, layer_number)
    multi_nn.set_loss_function(multi_nn.cross_entropy_loss)
    percent_error = []
    for k in range(10):
        multi_nn.train(X_train,
                       y_train,
                       batch_size=100,
                       num_epochs=20,
                       alpha=0.8)
        percent_error.append(multi_nn.calculate_percent_error(
            X_train, y_train))
    confusion_matrix = multi_nn.calculate_confusion_matrix(X_train, y_train)

    # print(np.array2string(np.array(percent_error), separator=","))
    # print(np.array2string(np.array(confusion_matrix), separator=","))
    assert np.allclose(percent_error, np.array( \
        [0.488,0.208,0.102,0.02 ,0.002,0.   ,0.   ,0.   ,0.   ,0.   ]), rtol=1e-3, atol=1e-3)
    assert np.allclose(confusion_matrix, np.array( \
        [[50., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 66., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 52., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 50., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 52., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 39., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 45., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 52., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 39., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 55.]]), rtol=1e-3, atol=1e-3)
def test_train():
    from tensorflow.keras.datasets import mnist
    np.random.seed(seed=1)
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    # Reshape and Normalize data
    X_train = X_train.reshape(-1, 784).astype(np.float64) / 255.0 - 0.5
    y_train = y_train.flatten().astype(np.int32)
    X_test = X_test.reshape(-1, 784).astype(np.float64) / 255.0 - 0.5
    y_test = y_test.flatten().astype(np.int32)
    input_dimension = X_train.shape[1]
    indices = list(range(X_train.shape[0]))
    # np.random.shuffle(indices)
    number_of_samples_to_use_for_training = 500
    number_of_samples_to_use_for_testing = 100
    X_train = X_train[indices[:number_of_samples_to_use_for_training]]
    y_train = y_train[indices[:number_of_samples_to_use_for_training]]
    X_test = X_test[indices[:number_of_samples_to_use_for_testing]]
    y_test = y_test[indices[:number_of_samples_to_use_for_testing]]
    multi_nn = MultiNN(input_dimension)
    number_of_classes = 10
    activations_list = ["Relu", "Relu", "Linear"]
    number_of_neurons_list = [50, 20, number_of_classes]
    for layer_number in range(len(activations_list)):
        multi_nn.add_layer(number_of_neurons_list[layer_number],
                           transfer_function=activations_list[layer_number])
    for layer_number in range(len(multi_nn.weights)):
        W = multi_nn.get_weights_without_biases(layer_number)
        W = tf.Variable((np.random.randn(*W.shape)) * 0.3, trainable=True)
        multi_nn.set_weights_without_biases(W, layer_number)
        b = multi_nn.get_biases(layer_number=layer_number)
        b = tf.Variable(np.zeros(b.shape) * 0, trainable=True)
        multi_nn.set_biases(b, layer_number)
    confusion_matrix = multi_nn.calculate_confusion_matrix(X_train, y_train)
    # print("************* Confusion Matrix with training data before training ***************\n", np.array2string(confusion_matrix, separator=","))
    assert np.allclose(confusion_matrix, np.array( \
        [[0., 0., 0., 0., 0., 0., 44., 3., 0., 3.],
         [0., 0., 0., 0., 1., 0., 62., 2., 0., 1.],
         [0., 0., 0., 0., 1., 0., 42., 3., 0., 6.],
         [0., 0., 0., 0., 0., 0., 47., 1., 0., 2.],
         [0., 0., 0., 0., 0., 0., 20., 16., 6., 10.],
         [0., 0., 0., 0., 1., 0., 30., 5., 1., 2.],
         [0., 0., 0., 0., 0., 0., 35., 4., 2., 4.],
         [0., 0., 0., 0., 0., 0., 25., 12., 13., 2.],
         [0., 0., 0., 0., 0., 0., 29., 1., 4., 5.],
         [0., 0., 0., 0., 1., 0., 43., 3., 8., 0.]]), rtol=1e-3, atol=1e-3)
    percent_error_with_training_data = []
    percent_error_with_test_data = []
    for k in range(10):
        multi_nn.train(X_train,
                       y_train,
                       batch_size=100,
                       num_epochs=20,
                       alpha=0.1)
        percent_error_with_training_data.append(
            multi_nn.calculate_percent_error(X_train, y_train))
        percent_error_with_test_data.append(
            multi_nn.calculate_percent_error(X_test, y_test))
    confusion_matrix = multi_nn.calculate_confusion_matrix(X_train, y_train)
    # print("************* Percent error using train ***************\n",np.array2string(np.array(percent_error_with_training_data), separator=","))
    # print("************* Confusion Matrix with training data ***************\n", np.array2string(confusion_matrix, separator=","))
    assert np.allclose(percent_error_with_training_data, np.array( \
        [0.324, 0.14, 0.084, 0.036, 0.022, 0.014, 0.012, 0.012, 0.012, 0.012]), rtol=1e-3, atol=1e-3)
    assert np.allclose(confusion_matrix, np.array( \
        [[50., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 65., 1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 52., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 1., 48., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 52., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 39., 0., 0., 0., 0.],
         [0., 0., 2., 0., 0., 0., 43., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 52., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 39., 0.],
         [0., 0., 0., 0., 0., 1., 0., 0., 0., 54.]]), rtol=1e-3, atol=1e-3)
    confusion_matrix = multi_nn.calculate_confusion_matrix(X_test, y_test)
    # print("************* Percent error using test ***************\n",np.array2string(np.array(percent_error_with_test_data), separator=","))
    # print("************* Confusion Matrix with test data ***************\n", np.array2string(confusion_matrix, separator=","))
    assert np.allclose(percent_error_with_test_data, np.array( \
        [0.51, 0.36, 0.3, 0.28, 0.28, 0.27, 0.26, 0.26, 0.26, 0.26]), rtol=1e-3, atol=1e-3)
    assert np.allclose(confusion_matrix, np.array( \
        [[7., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 12., 0., 0., 0., 0., 0., 0., 0., 2.],
         [0., 0., 4., 1., 1., 1., 0., 1., 0., 0.],
         [0., 0., 0., 9., 0., 1., 0., 0., 0., 1.],
         [1., 0., 0., 0., 11., 0., 0., 1., 0., 1.],
         [0., 0., 0., 1., 2., 2., 0., 0., 1., 1.],
         [0., 0., 1., 0., 1., 0., 7., 1., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 12., 0., 2.],
         [0., 0., 0., 0., 0., 1., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0., 0., 1., 0., 9.]]), rtol=1e-3, atol=1e-3)