Exemplo n.º 1
0
def train2(X,
           Y,
           nn_architecture,
           epochs,
           learning_rate,
           method,
           n_batches,
           batch_size,
           cost_type,
           N_nn,
           kernel_a,
           alpha_init,
           alpha_rate,
           beta,
           gamma,
           verbose,
           var_epsilon,
           dispersion_factor=6):

    if method == "sgd":
        N_nn = 1

    # initiation of neural net parameters
    params = [
        init_layers(nn_architecture, i, dispersion_factor) for i in range(N_nn)
    ]
    alpha = alpha_init

    # initiation of lists storing the history
    cost_history = []
    cost_history_mean = []
    accuracy_history = []
    elapsed_epochs = 0

    # performing calculations for subsequent iterations
    for i in range(epochs):

        for batch in range(n_batches):

            Y_hat = []
            costs = []
            cache = []
            grads = []

            start = batch * batch_size
            end = start + batch_size

            for j in range(N_nn):

                # step forward
                Y_hat_temp, cache_temp = full_forward_propagation(
                    X[:, start:end], params[j], nn_architecture)
                Y_hat.append(Y_hat_temp)
                cache.append(cache_temp)

                # calculating cost and saving it to history
                costj = get_cost_value(Y_hat[j], Y[:, start:end], cost_type)
                costs.append(costj)

                # step backward - calculating gradient
                if method in ["gradient", "gradient_old", "sgd"]:
                    gradsj = full_backward_propagation(Y_hat[j], Y[:,
                                                                   start:end],
                                                       cache[j], params[j],
                                                       nn_architecture)
                    grads.append(gradsj)

            if method == "gradient":
                params, var = update_nn_weights(params, grads, N_nn,
                                                learning_rate, kernel_a, alpha,
                                                beta, gamma)
            elif method == "gradient_old":
                params, var = update_nn_weights_old(params, grads, N_nn,
                                                    learning_rate, kernel_a,
                                                    alpha, beta, gamma)
            elif method == "nogradient":
                params, var = update_nn_weights_derivative_free(
                    params, costs, learning_rate, N_nn, kernel_a, alpha, beta,
                    gamma)
            elif method == "nogradient_old":
                params, var = update_nn_weights_derivative_free_old(
                    params, costs, learning_rate, N_nn, kernel_a, alpha, beta,
                    gamma)
            elif method == "sgd":
                params, var = update_sgd(params[0], grads[0], nn_architecture,
                                         learning_rate)
            else:
                raise Exception("No method found")

            #end of iteration
            cost_history.append(costs)

            #mean position
            mean_param = get_mean(params)
            Y_hat_mean, _ = full_forward_propagation(X[:,
                                                       start:end], mean_param,
                                                     nn_architecture)
            cost_mean = get_cost_value(Y_hat_mean, Y[:, start:end], cost_type)
            cost_history_mean.append(cost_mean)

        #end of epoch----------------
        var_mean = np.mean(
            var)  #mean of variances along dimensions of parameter space

        if (verbose):
            print(
                "Iteration: {:05} - mean cost: {:.5f} - particle variance: {:.5f}"
                .format(i, np.mean(costs), var_mean))

        alpha += alpha_rate
        elapsed_epochs += 1

        if var_mean < var_epsilon:
            print("Convergence achieved - Particles are localized")
            break

    if not method == "sgd":
        plot_cost(cost_history, cost_history_mean, 'Training Cost Function')
        plot_list(np.mean(cost_history, axis=1), 'Mean Cost Function')
        plot_distance_matrix(params, N_nn)
    else:
        plot_list(cost_history, 'Training Cost Function')

    print("Cost Function evaluated {:01} times".format(
        int(n_batches * elapsed_epochs * N_nn)))

    return params, mean_param
Exemplo n.º 2
0
def train_experimental(X,
                       Y,
                       nn_architecture,
                       epochs,
                       learning_rate,
                       method,
                       n_batches,
                       batch_size,
                       cost_type,
                       N_nn,
                       kernel_a,
                       alpha_init,
                       alpha_rate,
                       beta,
                       gamma,
                       verbose,
                       var_epsilon,
                       dispersion_factor=6):

    from optimizers import update_cloud_derivative_free
    from utils import flatten_weights, unflatten_weights

    if method == "sgd":
        N_nn = 1

    # initiation of neural net parameters
    params = [
        init_layers(nn_architecture, i, dispersion_factor) for i in range(N_nn)
    ]
    alpha = alpha_init

    # initiation of lists storing the history
    cost_history = []
    cost_history_mean = []
    accuracy_history = []

    elapsed_epochs = 0

    #find optimal kernel_a
    if kernel_a == "auto":
        print("Finding kernel constant...")
        paramsf, _, _ = flatten_weights(params, N_nn)
        params_diff_matrix = paramsf[:, np.newaxis] - paramsf
        norm = np.sum(params_diff_matrix**2, axis=2)
        for kernel_a in [
                0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10
        ]:
            if (np.mean(np.einsum('ij -> i', np.exp(-kernel_a * norm)) /
                        N_nn)) < 0.5:
                break

        print("Kernel constant found: " + str(kernel_a))

    if learning_rate == "auto":
        learning_rate = 1
        lr_decay = True
    else:
        lr_decay = False

    # performing calculations for subsequent iterations
    for i in range(epochs):

        for batch in range(n_batches):

            Y_hat = []
            costs = []
            cache = []
            grads = []

            start = batch * batch_size
            end = start + batch_size

            for j in range(N_nn):

                # step forward
                Y_hat_temp, cache_temp = full_forward_propagation(
                    X[:, start:end], params[j], nn_architecture)
                Y_hat.append(Y_hat_temp)
                cache.append(cache_temp)

                # calculating cost and saving it to history
                costj = get_cost_value(Y_hat[j], Y[:, start:end], cost_type)
                costs.append(costj)

            #get cloud (flattened weights), gradients, nn shape and weight names
            paramsf, nn_shape, weight_names = flatten_weights(params, N_nn)

            #get updated cloud and its variance
            paramsf, var = update_cloud_derivative_free(
                paramsf, costs, learning_rate, N_nn, kernel_a, alpha, beta,
                gamma)

            #restore NN weight shapes
            params = unflatten_weights(paramsf, nn_shape, weight_names, N_nn)

            if (lr_decay):
                if i == 0:
                    paramsf_previous = paramsf
                    gt = 0

                delta = paramsf_previous - paramsf
                gt = gt + np.absolute(delta)
                learning_rate = 1 / np.sqrt(1 + gt)

                paramsf_previous = paramsf
                #print(np.mean(learning_rate))

            #end of iteration
            cost_history.append(costs)

            #mean position
            mean_param = get_mean(params)
            Y_hat_mean, _ = full_forward_propagation(X[:,
                                                       start:end], mean_param,
                                                     nn_architecture)
            cost_mean = get_cost_value(Y_hat_mean, Y[:, start:end], cost_type)
            cost_history_mean.append(cost_mean)

        #end of epoch----------------
        var_mean = np.mean(
            var)  #mean of variances along dimensions of parameter space

        if (verbose):
            print(
                "Iteration: {:05} - mean cost: {:.5f} - particle variance: {:.5f}"
                .format(i, np.mean(costs), var_mean))

        alpha = alpha + alpha_rate
        elapsed_epochs += 1

        if var_mean < var_epsilon:
            print("Convergence achieved - Particles are localized")
            break

    if not method == "sgd":
        plot_cost(cost_history, cost_history_mean, 'Training Cost Function')
        plot_list(np.mean(cost_history, axis=1), 'Mean Cost Function')
        plot_distance_matrix(params, N_nn)
    else:
        plot_list(cost_history, 'Training Cost Function')

    print("Cost Function evaluated {:01} times".format(
        int(n_batches * elapsed_epochs * N_nn)))

    return params, mean_param
def train_with_profiling(X,
                         Y,
                         nn_architecture,
                         epochs,
                         learning_rate,
                         method,
                         n_batches,
                         batch_size,
                         cost_type,
                         N,
                         kernel_a,
                         alpha_init,
                         alpha_rate,
                         beta,
                         gamma,
                         verbose,
                         var_epsilon,
                         dispersion_factor=6):

    import time
    from optimizers import update_nn_weights_profiled, update_nn_weights_derivative_free_profiled

    if method == "sgd":
        N = 1

    profiling_time = {
        "full_forward_propagation": 0.0,
        "get_cost_value": 0.0,
        "full_backward_propagation": 0.0,
        "weights_update": 0.0,
        "weights_update_without_flattening": 0.0
    }

    # initiation of neural net parameters
    params = [
        init_layers(nn_architecture, i, dispersion_factor) for i in range(N)
    ]
    alpha = alpha_init

    # initiation of lists storing the history
    cost_history = []
    cost_history_mean = []
    accuracy_history = []

    elapsed_epochs = 0

    # performing calculations for subsequent iterations
    for i in range(epochs):

        for batch in range(n_batches):

            Y_hat = []
            costs = []
            cache = []
            grads = []
            start = batch * batch_size
            end = start + batch_size

            for j in range(N):

                # step forward
                full_forward_data = full_forward_propagation(
                    X[:, start:end], params[j], nn_architecture)

                time_temp = time.process_time()
                Y_hat_temp, cache_temp = full_forward_propagation(
                    X[:, start:end], params[j], nn_architecture)
                profiling_time["full_forward_propagation"] += (
                    time.process_time() - time_temp)
                Y_hat.append(Y_hat_temp)
                cache.append(cache_temp)

                # calculating cost and saving it to history
                time_temp = time.process_time()
                costj = get_cost_value(Y_hat[j], Y[:, start:end], cost_type)
                profiling_time["get_cost_value"] += (time.process_time() -
                                                     time_temp)
                costs.append(costj)

                # step backward - calculating gradient
                if method in ["gradient", "sgd"]:
                    time_temp = time.process_time()
                    gradsj = full_backward_propagation(Y_hat[j], Y[:,
                                                                   start:end],
                                                       cache[j], params[j],
                                                       nn_architecture)
                    profiling_time["full_backward_propagation"] += (
                        time.process_time() - time_temp)
                    grads.append(gradsj)

            time_temp = time.process_time()
            if method == "gradient":
                params, var, cputime = update_nn_weights_profiled(
                    params, grads, N, learning_rate, kernel_a, alpha, beta,
                    gamma)
            elif method == "nogradient":
                params, var, cputime = update_nn_weights_derivative_free_profiled(
                    params, costs, learning_rate, N, kernel_a, alpha, beta,
                    gamma)
            elif method == "sgd":
                params, var = update_sgd(params[0], grads[0], nn_architecture,
                                         learning_rate)
            else:
                raise Exception("No method found")
            profiling_time["weights_update"] += (time.process_time() -
                                                 time_temp)

            if method in ["gradient", "nogradient"]:
                profiling_time["weights_update_without_flattening"] += cputime

            #end of iteration
            cost_history.append(costs)

            #mean position
            mean_param = get_mean(params)
            Y_hat_mean, _ = full_forward_propagation(X[:,
                                                       start:end], mean_param,
                                                     nn_architecture)
            cost_mean = get_cost_value(Y_hat_mean, Y[:, start:end], cost_type)
            cost_history_mean.append(cost_mean)

        #end of epoch----------------
        var_mean = np.mean(
            var)  #mean of variances along dimensions of parameter space

        if (verbose):
            print(
                "Iteration: {:05} - mean cost: {:.5f} - particle variance: {:.5f}"
                .format(i, np.mean(costs), var_mean))

        alpha = alpha + alpha_rate
        elapsed_epochs += 1

        if var_mean < var_epsilon:
            print("Convergence achieved - Particles are localized")
            break

    if not method == "sgd":
        plot_cost(cost_history, cost_history_mean, 'Training Cost Function')
        plot_list(np.mean(cost_history, axis=1), 'Mean Cost Function')
        plot_distance_matrix(params, N)
    else:
        plot_list(cost_history, 'Training Cost Function')

    print("Cost Function evaluated {:01} times".format(
        int(n_batches * elapsed_epochs * N)))
    print("")
    print("CPU TIME --------------------------------------")
    for key, value in profiling_time.items():
        print(key, value)
    print("")

    return params, mean_param
def train_nn(X, Y, cloud, nn_architecture, method, max_epochs, n_batches,
             batch_size, learning_rate, cost_type, N, kernel_a, alpha_init,
             alpha_rate, beta, gamma, verbose, var_epsilon):

    # initiation of lists storing the cost history
    cost_history = []
    cost_history_mean = []

    alpha = alpha_init
    elapsed_epochs = 0
    print("\nTraining started...")

    # performing calculations for subsequent iterations
    for i in range(max_epochs):

        for batch in range(n_batches):

            start = batch * batch_size
            end = start + batch_size

            Y_hat = []
            costs = []
            cache = []
            grads = []

            for j in range(N):

                # step forward
                Y_hat_temp, cache_temp = full_forward_propagation(
                    X[:, start:end], cloud[j], nn_architecture)
                Y_hat.append(Y_hat_temp)
                cache.append(cache_temp)

                # calculating cost and saving it to history
                costj = get_cost_value(Y_hat[j], Y[:, start:end], cost_type)
                costs.append(costj)
                # step backward - calculating gradient
                if method in ["gradient_descent", "swarm"]:
                    gradsj = full_backward_propagation(Y_hat[j], Y[:,
                                                                   start:end],
                                                       cache[j], cloud[j],
                                                       nn_architecture)
                    grads.append(gradsj)

            if method == "swarm":
                cloud, cloud_var = update_nn_weights(cloud, grads,
                                                     learning_rate, N,
                                                     kernel_a, alpha, beta,
                                                     gamma)
            elif method == "swarm_derivfree":
                cloud, cloud_var = update_nn_weights_derivative_free(
                    cloud, costs, learning_rate, N, kernel_a, alpha, beta,
                    gamma)
            elif method == "gradient_descent":
                cloud, cloud_var = update_gd(cloud[0], grads[0],
                                             nn_architecture, learning_rate)
            else:
                raise Exception("No method found")

            #end of iteration
            cost_history.append(costs)

            #mean particle position and its cost
            cloud_mean = get_mean(cloud)
            Y_hat_mean, _ = full_forward_propagation(X[:,
                                                       start:end], cloud_mean,
                                                     nn_architecture)
            cost_mean = get_cost_value(Y_hat_mean, Y[:, start:end], cost_type)
            cost_history_mean.append(cost_mean)

        #end of epoch----------------
        cloud_var = np.mean(
            cloud_var)  #mean of variances along dimensions of parameter space

        if (verbose):
            print(
                "Iteration: {:05} - Cloud mean cost: {:.5f} - Cloud variance: {:.5f}"
                .format(i, cost_mean, cloud_var))

        alpha += alpha_rate
        elapsed_epochs += 1

        if cloud_var < var_epsilon:
            print("Convergence achieved - particles are localized")
            break

    if i == (max_epochs - 1): print("Maximum amount of epochs reached")

    print("\nFunction value at cloud mean: " + str(cost_mean))
    print("Cost function evaluated {:01} times".format(
        int(n_batches * elapsed_epochs * N)))

    return cloud, cloud_mean, cloud_var, cost_history, cost_history_mean
 def forward_propagation(self, X, cloud):
     Y, _ = full_forward_propagation(X.T, cloud, self.architecture)
     return Y.T