예제 #1
0
    def solve(self, costs_matrix, init_state=None):
        state = greedy_solution(costs_matrix) if not init_state else init_state
        current_energy = compute_cost(state, costs_matrix)
        best_state = state
        best_state_energy = current_energy
        T = self.init_T

        size = len(state)

        for _ in range(1, self.n_iter):
            move = get_random_move(size)
            candidat_state = self.state_gen(state, *move)
            candidant_energy = compute_cost(candidat_state, costs_matrix)

            if (candidant_energy < current_energy):
                current_energy = candidant_energy
                state = candidat_state
                if (current_energy < best_state_energy):
                    best_state = state
                    best_state_energy = current_energy
            else:
                p = transition_probability(candidant_energy - current_energy,
                                           T)
                if make_transition(p):
                    current_energy = candidant_energy
                    state = candidat_state

            T = T * self.cooling_factor
            if T <= self.end_T:
                break

        self.final_path = best_state
        self.final_cost = best_state_energy
예제 #2
0
    def solve(self, matrix):
        if self.init_state is None:
            self.init_state = greedy_solution(matrix)

        current_state = self.init_state
        current_cost = compute_cost(current_state, matrix)
        best_move = None

        self.final_path = current_state
        self.final_cost = current_cost

        tabus = []

        for _ in range(self.n_iter):
            neighbours = self.get_neighours(current_state)
            neighbours_costs = map(
                lambda state: compute_cost(state.path, matrix), neighbours)
            neighbours_with_cost = zip(neighbours_costs, neighbours)
            for cost, neighbour in sorted(neighbours_with_cost):
                if cost < current_cost:
                    if neighbour.move not in tabus or self.aspiration_criteria(
                            cost):
                        current_cost = cost
                        current_state = neighbour.path
                        best_move = neighbour.move

            if current_cost < self.final_cost:
                self.final_cost = current_cost
                self.final_path = current_state

            tabus.append(best_move)

            if len(tabus) > self.tabu_size:
                tabus.pop(0)
예제 #3
0
 def L_layer_model(self,
                   X,
                   Y,
                   learning_rate=0.0075,
                   num_iterations=3000,
                   print_cost=False):
     np.random.seed(1)
     # keep track of cost
     costs = []
     steps = []
     # Parameters initialization.
     parameters = self.initialize_parameters()
     # Loop (gradient descent)
     grads_old = None
     for i in range(0, num_iterations):
         # Forward propagation:[LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID.
         AL, forward_cache = self.L_model_forward(X, parameters)
         # Compute cost.
         cost = compute_cost(AL, Y)
         # Backward propagation.
         grads = self.L_model_backward(AL, Y, parameters, forward_cache)
         # Update parameters.
         parameters = self.update_parameters(parameters, grads,
                                             learning_rate, grads_old)
         grads_old = grads
         # Print the cost every 100training example
         if print_cost and i % 4 == 0:
             print("Cost afteriteration %i: %f" % (i, cost))
             steps.append(i)
             costs.append(cost)
     self.result.append((steps, costs))
     return parameters
예제 #4
0
def train(
    X: np.ndarray, y: np.ndarray, epoch: int = 100, learning_rate: float = 0.01
) -> dict:
    """
    Train a logistic regression model

    Parameters
    ----------
    X: [n,m] matrix of training examples
    Y: [1,m] matrix of output labels/values
    epoch: number of iterations to perform
    learning_rate: step-size for gradient descent update

    Returns
    ---------
    a dictionary of learnt parameters (weights & biases)
    """

    params = init_parameters(X.shape[0])

    print(f"X.shape = {X.shape}, Y.shape = {Y.shape}")
    print(f"initial params: {params}")
    for i in range(epoch):
        A = forward_prop(X=X, params=params)  # forward prop to get prediction
        cost = compute_cost(Y=Y, Y_hat=A)  # compute cost
        grads = compute_grads(X=X, Y=Y, A=A, params=params)  # compute gradient
        params = update_parameters(
            params=params, grads=grads, learning_rate=learning_rate
        )  # update parameters using gradient descent
        if i % 100 == 0:
            print(f"epoch={i}\tcost={cost}")
    print(f"learnt params: {params}")

    return params
예제 #5
0
    def solve(self, costs):
        size = costs.shape[0]
        for path in itertools.permutations(range(1, size)):
            cost = compute_cost(path, costs)
            if cost < self.final_cost:
                self.final_cost = cost
                self.final_path = path

        self.final_path = list(self.final_path)
예제 #6
0
def execute(rnn, x, y, sequence_length):
    # Execute the model
    (chx, mhx, rv) = (None, None, None)
    output, (chx, mhx, rv), v = rnn(x, (None, mhx, None),
                                    reset_experience=True,
                                    pass_through_memory=True)

    # Get only the final part of the sequence
    y_out = sigm(output[:, :-sequence_length, :-3])
    y = y[:, :, :-3]

    return compute_cost(sigm(output[:, -sequence_length:, :-3]),
                        y,
                        batch_size=1).item()
예제 #7
0
def train(
    X: np.ndarray,
    y: np.ndarray,
    hidden_layer_dims: list = [3],
    epoch: int = 100,
    learning_rate: float = 0.01,
) -> dict:
    """
    Train a neural network

    Parameters
    ----------
    X: [n,m] matrix of training examples
    Y: [1,m] matrix of output labels/values
    hidden_layer_dims: a list of hidden layer dimensions where each item denotes number of neurons in that layer
    epoch: number of iterations to perform
    learning_rate: step-size for gradient descent update

    Returns
    ---------
    a dictionary of learnt parameters (weights & biases)
    """

    n_x = X.shape[0]
    n_y = y.shape[0]

    layer_dims = [n_x] + hidden_layer_dims + [n_y]
    params = init_parameters(layer_dims=layer_dims)

    print(f"X.shape = {X.shape}, Y.shape = {Y.shape}")
    print("layer dims", layer_dims)
    print(f"initial params: {params}")
    for i in range(epoch):
        A, cache = forward_prop(
            X=X, params=params)  # forward prop to get prediction
        cost = compute_cost(Y=Y, Y_hat=A)  # compute cost
        grads = compute_grads(X=X, Y=Y, cache=cache,
                              params=params)  # compute gradient
        params = update_parameters(
            params=params, grads=grads, learning_rate=learning_rate
        )  # update parameters using gradient descent
        if i % 100 == 0:
            print(f"epoch={i}\tcost={cost}")
    print(f"learnt params: {params}")

    return params
예제 #8
0
def compute_cost_with_regularization(A3, Y, parameters, lambd):
    """
    损失函数中增加L2正则化
    """
    m = Y.shape[1]
    W1 = parameters["W1"]
    W2 = parameters["W2"]
    W3 = parameters["W3"]

    # 计算交叉熵损失
    cross_entropy_cost = compute_cost(A3, Y)

    # 开始
    L2_regularization_cost = (1. / m) * (lambd / 2) * \
                             (np.sum(np.square(W1)) + np.sum(np.square(W2)) + np.sum(np.square(W3)))

    cost = cross_entropy_cost + L2_regularization_cost
    # 结束

    return cost
예제 #9
0
plot_data(X, Y, xlabel, ylabel, legend)

# =============  Map data to Polynomial features ============== #

# this maps the features to a polynomial of degree 6
X = map_features(X[:, 0], X[:, 1])

# ============= Run logistic regression ================ #
initial_theta = np.zeros(X.shape[1])

# Regularization Parameter: This dictates how much the cost function is penalized
initial_lambda = 1

print('Computing cost with initial theta...')
cost = compute_cost(initial_theta, X, Y, regularized=True, lambda_=initial_lambda)
grad = compute_gradient(initial_theta, X, Y, regularized=True, lambda_=initial_lambda)

print(f'Cost with initial theta: {cost}')
print(f'First 5 gradients with initial theta\n{grad[:5].reshape(-1, 1)}')

test_theta = np.ones(X.shape[1])
test_lambda = 10

print('Computing cost with test theta...')
cost = compute_cost(test_theta, X, Y, regularized=True, lambda_=test_lambda)
grad = compute_gradient(test_theta, X, Y, regularized=True, lambda_=test_lambda)

print(f'Cost with test theta: {cost}')
print(f'First 5 gradients with test theta\n{grad[:5].reshape(-1, 1)}')
예제 #10
0
def model(X, Y, learning_rate=0.3, num_iterations=30000, lambd=0, keep_prob=1):
    """
    使用三层网络,激活函数为:LINEAR->RELU->LINEAR->RELU->LINEAR->SIGMOID.
    第一个隐层:20个神经元
    第二个隐层:3个神经元
    输出层:1个神经元
    """

    grads = {}
    costs = []
    m = X.shape[1]
    layers_dims = [X.shape[0], 20, 3, 1]

    # 初始化网络参数
    parameters = initialize_parameters(layers_dims)

    # 梯度下降循环逻辑
    for i in range(0, num_iterations):

        # 前向传播计算
        # LINEAR -> RELU -> LINEAR -> RELU -> LINEAR -> SIGMOID.
        # 如果keep_prob=1,进行正常前向传播
        # 如果keep_prob<1,说明需要进行droupout计算
        if keep_prob == 1:
            a3, cache = forward_propagation(X, parameters)
        elif keep_prob < 1:
            a3, cache = forward_propagation_with_dropout(X, parameters, keep_prob)

        # 计算损失
        # 如果传入lambd不为0,判断加入正则化
        if lambd == 0:
            cost = compute_cost(a3, Y)
        else:
            cost = compute_cost_with_regularization(a3, Y, parameters, lambd)

        # 只允许选择一个,要么L2正则化,要么Droupout
        assert (lambd == 0 or keep_prob == 1)

        if lambd == 0 and keep_prob == 1:
            grads = backward_propagation(X, Y, cache)
        elif lambd != 0:
            grads = backward_propagation_with_regularization(X, Y, cache, lambd)
        elif keep_prob < 1:
            grads = backward_propagation_with_dropout(X, Y, cache, keep_prob)

        # 更新参数
        parameters = update_parameters(parameters, grads, learning_rate)

        # 每10000词打印损失结果
        if i % 10000 == 0:
            print("迭代次数为 {}: 损失结果大小:{}".format(i, cost))
            costs.append(cost)

    # 画出损失变化结果图
    plt.plot(costs)
    plt.ylabel('损失')
    plt.xlabel('迭代次数')
    plt.title("损失变化图,学习率为" + str(learning_rate))
    plt.show()

    return parameters
예제 #11
0
print('Remember to close the plot. Otherwise, the process does not continue')
xlabel = 'Score: First Exam'
ylabel = 'Score: Second Exam'
legend = ['Admitted', 'Not admitted']

plot_data(X, Y, xlabel, ylabel, legend)

# ============= Part 2: Compute cost and gradient ============== #

print('Calculating cost and gradient...')

m, n = X.shape
X = np.concatenate((np.ones((m, 1)), X), axis=1)
initial_theta = np.zeros((n + 1, 1))
cost = compute_cost(initial_theta, X, Y)
grad = compute_gradient(initial_theta, X, Y)

print(f'Cost with initial parameters (all zeros): {cost}')
print(f'Gradients with initial parameters:\n{grad}')

test_theta = np.array([[-24], [0.2], [0.2]])
cost = compute_cost(test_theta, X, Y)
grad = compute_gradient(test_theta, X, Y)

print(f'Cost with test parameters:\n{test_theta}\nCost:{cost}')
print(f'Gradients with test parameters: \n{grad}')

input('Press enter to continue...')

# ================= Part 3: Optimizing ================== #
예제 #12
0
def model(X_train,
          Y_train,
          X_test,
          Y_test,
          learning_rate=0.0001,
          num_epochs=1500,
          minibatch_size=32,
          print_cost=True):
    """
	Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX.

	Arguments:
	X_train -- training set, of shape (input size = 784, number of training examples = 27455)
	Y_train -- training set, of shape (output size = 24, number of training examples = 27455)
	X_test -- test set, of shape (input size = 784, number of training examples = 7172)
	Y_test -- test set, of shape (output size = 24, number of test examples = 7172)
	learning_rate -- learning rate of the optimization
	num_epochs -- number of epochs of the optimization loop
	minibatch_size -- size of a minibatch
	print_cost -- True to print the cost every 100 epochs

	Returns:
	parameters -- parameters learnt by the model. They can then be used to predict.
	"""

    ops.reset_default_graph(
    )  # to be able to rerun the model without overwriting tf variables
    tf.set_random_seed(1)  # to keep consistent results
    seed = 3  # to keep consistent results
    (
        n_x, m
    ) = X_train.shape  # (n_x: input size, m : number of examples in the train set)
    n_y = Y_train.shape[0]  # n_y : output size
    costs = []  # To keep track of the cost

    # Create Placeholders of shape (n_x, n_y)
    X, Y = create_placeholders(n_x, n_y)

    # Initialize parameters
    parameters = initialize_parameters()

    # Forward propagation: Build the forward propagation in the tensorflow graph
    Z3 = forward_propagation(X, parameters)

    # Cost function: Add cost function to tensorflow graph
    cost = compute_cost(Z3, Y)

    # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
    optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(cost)

    # Initialize all the variables
    init = tf.global_variables_initializer()

    # Start the session to compute the tensorflow graph
    with tf.Session() as sess:

        # Run the initialization
        sess.run(init)

        # Do the training loop
        for epoch in range(num_epochs):

            epoch_cost = 0.  # Defines a cost related to an epoch
            num_minibatches = int(
                m / minibatch_size
            )  # number of minibatches of size minibatch_size in the train set
            seed = seed + 1
            minibatches = random_mini_batches(X_train, Y_train, minibatch_size,
                                              seed)

            for minibatch in minibatches:

                # Select a minibatch
                (minibatch_X, minibatch_Y) = minibatch

                # IMPORTANT: The line that runs the graph on a minibatch.
                # Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y).
                _, minibatch_cost = sess.run([optimizer, cost],
                                             feed_dict={
                                                 X: minibatch_X,
                                                 Y: minibatch_Y
                                             })

                epoch_cost += minibatch_cost / minibatch_size

            # Print the cost every epoch
            if print_cost == True and epoch % 100 == 0:
                print("Cost after epoch %i: %f" % (epoch, epoch_cost))
            if print_cost == True and epoch % 5 == 0:
                costs.append(epoch_cost)

        # plot the cost
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per fives)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()

        # lets save the parameters in a variable
        parameters = sess.run(parameters)
        print("Parameters have been trained!")

        # Calculate the correct predictions
        correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y))

        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        print("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
        print("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))

        return parameters
예제 #13
0
def model(X_train, Y_train, X_test, Y_test, learning_rate, num_epochs, minibatch_size, print_cost = True):

    tf.reset_default_graph()                         # to be able to rerun the model without overwriting tf variables
    tf.set_random_seed(1)                             # to keep consistent results
    seed = 3                                          # to keep consistent results
    
    (n_x, m) = X_train.shape                          # (n_x: input size, m : number of examples in the train set)
    n_y = Y_train.shape[0]                            # n_y : output size
    costs = []                                        # To keep track of the cost
    
    X, Y = create_placeholders(n_x, n_y)
    
    parameters = initialize_parameters()

    Z3 = forward_propagation(X, parameters)
    
    cost = compute_cost(Z3, Y)
    print(cost)
    
    # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer.
    ### START CODE HERE ### (1 line)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
    ### END CODE HERE ###
    
    # Initialize all the variables
    init = tf.global_variables_initializer()

    # Start the session to compute the tensorflow graph
    with tf.Session() as sess:
        
        # Run the initialization
        sess.run(init)
        
        # Do the training loop
        for epoch in range(num_epochs):

            epoch_cost = 0.                       # Defines a cost related to an epoch
            num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
            seed = seed + 1
            minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)

            for minibatch in minibatches:

                # Select a minibatch
                (minibatch_X, minibatch_Y) = minibatch
                
                # IMPORTANT: The line that runs the graph on a minibatch.
                # Run the session to execute the "optimizer" and the "cost", the feedict should contain a minibatch for (X,Y).
                ### START CODE HERE ### (1 line)
                _ , minibatch_cost = sess.run([optimizer, cost], 
                                             feed_dict={X: minibatch_X, 
                                                        Y: minibatch_Y})
                ### END CODE HERE ###
                
                epoch_cost += minibatch_cost / num_minibatches

            # Print the cost every epoch
            if print_cost == True and epoch % 100 == 0:
                print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
            if print_cost == True and epoch % 5 == 0:
                costs.append(epoch_cost)
                
        # plot the cost
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per tens)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()
        
#        check=sess.run(tf.test.compute_gradient_error(X_train, X_train.shape, Y_train, Y_train.shape))
#        print(check)

        # lets save the parameters in a variable
        parameters = sess.run(parameters)
        print ("Parameters have been trained!")

        # Calculate the correct predictions
        correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y))
        
        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

        print ("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
        print("X_test -------", X_test.shape)
        print ("Test Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
        
        return parameters
예제 #14
0
def model(X, Y, optimizer, learning_rate=0.0007, mini_batch_size=64, beta=0.9,
          beta1=0.9, beta2=0.999, epsilon=1e-8, num_epochs=10000, print_cost=True):
    """
    模型逻辑
    定义一个三层网络(不包括输入层)
    第一个隐层:5个神经元
    第二个隐层:2个神经元
    输出层:1个神经元
    """
    # 计算网络的层数
    layers_dims = [train_X.shape[0], 5, 2, 1]

    L = len(layers_dims)
    costs = []
    t = 0
    seed = 10

    # 初始化网络结构
    parameters = initialize_parameters(layers_dims)

    # 初始化优化器参数
    if optimizer == "momentum":
        v = initialize_momentum(parameters)
    elif optimizer == "adam":
        v, s = initialize_adam(parameters)

    # 优化逻辑
    for i in range(num_epochs):

        # 每次迭代所有样本顺序打乱不一样
        seed = seed + 1
        # 获取每批次数据
        minibatches = random_mini_batches(X, Y, mini_batch_size, seed)

        # 开始

        for minibatch in minibatches:

            # Mini-batch每批次的数据
            (minibatch_X, minibatch_Y) = minibatch

            # 前向传播minibatch_X, parameters,返回a3, caches
            a3, caches = forward_propagation(minibatch_X, parameters)

            # 计算损失,a3, minibatch_Y,返回cost
            cost = compute_cost(a3, minibatch_Y)

            # 反向传播,返回梯度
            gradients = backward_propagation(minibatch_X, minibatch_Y, caches)

            # 更新参数
            if optimizer == "momentum":

                parameters, v = update_parameters_with_momentum(parameters, gradients, v, beta, learning_rate)

            elif optimizer == "adam":
                t = t + 1
                parameters, v, s = update_parameters_with_adam(parameters, gradients, v, s,
                                                               t, learning_rate, beta1, beta2, epsilon)

        # 结束

        # 每个1000批次打印损失
        if print_cost and i % 1000 == 0:
            print("第 %i 次迭代的损失值: %f" % (i, cost))
        if print_cost and i % 100 == 0:
            costs.append(cost)

    # 画出损失的变化
    plt.plot(costs)
    plt.ylabel('cost')
    plt.xlabel('epochs (per 100)')
    plt.title("损失图")
    plt.show()

    return parameters
예제 #15
0
    def model(self,
              path_train_dataset,
              path_test_dataset,
              X_train_column,
              Y_train_column,
              X_test_column,
              Y_test_column,
              classes_list,
              optimizer_algo='adam',
              print_cost=True):

        # load the datasets
        X_train_orig, Y_train_orig, X_test_orig, Y_test_orig, classes = load_dataset(
            path_train_dataset, path_test_dataset, X_train_column,
            Y_train_column, X_test_column, Y_test_column, classes_list)

        # pre-processing
        X_train, Y_train, X_test, Y_test = flatten(X_train_orig, Y_train_orig,
                                                   X_test_orig, Y_test_orig,
                                                   classes)

        # to be able to rerun the model without overwriting tf variables
        ops.reset_default_graph()
        # (n_x: input size, m : number of examples in the train set)
        (n_x, m) = X_train.shape
        n_y = Y_train.shape[0]  # n_y : output size
        costs = []  # To keep track of the cost

        # Create Placeholders of shape (n_x, n_y)
        X, Y = create_placeholders(n_x, n_y)

        # Initialize parameters
        parameters = initialize_parameters(self.layers_list, seed=1)

        # Forward propagation: Build for-propagation in the tensorflow graph
        Z_final_layer = forward_propagation(X, parameters)

        # Cost function: Add cost function to tensorflow graph
        cost = compute_cost(Z_final_layer, Y)

        # Backpropagation: Define the tensorflow optimizer. Use AdamOptimizer
        if optimizer_algo == 'gradient_descent':
            optimizer = tf.train.GradientDescentOptimizer(
                learning_rate=self.learning_rate).minimize(cost)

        elif optimizer_algo == 'momentum':
            optimizer = tf.train.MomentumOptimizer(
                learning_rate=self.learning_rate).minimize(cost)

        elif optimizer_algo == 'adam':
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate).minimize(cost)

        # Initialize all the variables
        init = tf.global_variables_initializer()

        # Start the session to compute the tensorflow graph
        with tf.Session() as sess:

            # Run the initialization
            sess.run(init)

            # Do the training loop
            for epoch in range(self.n_epochs):

                epoch_cost = 0.  # Defines a cost related to an epoch
                # number of minibatches of size minibatch_size in the train set
                num_minibatches = int(m / minibatch_size)
                seed = seed + 1
                minibatches = random_mini_batches(X_train, Y_train,
                                                  self.minibatch_size, seed)

                for minibatch in minibatches:

                    # Select a minibatch
                    (minibatch_X, minibatch_Y) = minibatch
                    _, minibatch_cost = sess.run([optimizer, cost],
                                                 feed_dict={
                                                     X: minibatch_X,
                                                     Y: minibatch_Y
                                                 })

                    epoch_cost += minibatch_cost / minibatch_size

                # Print the cost every epoch
                if print_cost == True and epoch % 100 == 0:
                    print("Cost after epoch %i: %f" % (epoch, epoch_cost))

                if print_cost == True and epoch % 5 == 0:
                    costs.append(epoch_cost)

            # lets save the parameters in a variable
            parameters = sess.run(parameters)
            print("Parameters have been trained!")

            # stores quantities useful for later
            quantities = {
                "X": X,
                "Y": Y,
                "Z_final_layer": Z_final_layer,
                "X_train": X_train,
                "Y_train": Y_train,
                "X_test": X_test,
                "Y_test": Y_test
            }

        return quantities, costs, parameters
예제 #16
0
display_data(X[rand_indxs, :])

input('Press enter to continue...')

# ========== Test Logistic Regression ============ #

theta_t = np.array([-2, -1, 1, 2])
ones = np.ones((5, 1))

X_t = np.concatenate(
    [np.ones((5, 1)),
     np.arange(1, 16).reshape(5, 3, order='F') / 10], axis=1)
y_t = np.array([1, 0, 1, 0, 1]) >= 0.5
lambda_t = 3

cost = compute_cost(theta_t, X_t, y_t, regularized=True, lambda_=lambda_t)
grad = compute_gradient(theta_t, X_t, y_t, regularized=True, lambda_=lambda_t)

print(f'cost with test parameters: {cost}')
print(f'gradients with test parameters:\n{grad}')

input('Press enter to continue...')

# =============== Train One vs All =============== #

lambda_ = 0.1
m = OneVsAll(X, Y, num_labels, lambda_)
m.fit()

# =============== Predict One vs All =============== #