def run_training(): x_train, y_train, x_val, y_val, x_test, y_test = load_data_wrapper() learning_rate = 1e-1 batch_size = 50 max_epochs = 8 mlp_model = MLP(input_dim=784, output_dim=10, hidden_dims=[30], activation_functions=[sigmoid], init_parameters_sd=1, optimizer=SGD(learning_rate=learning_rate)) print(mlp_model) train_model(mlp_model, x_train, y_train, lr=learning_rate, batch_size=batch_size, max_epochs=max_epochs, x_val=x_val, y_val=y_val, plot=True)
def run_training_and_evaluation(): x_train, y_train, x_val, y_val, x_test, y_test = load_data_wrapper() hidden_dims = [100] activation_functions = [sigmoid, sigmoid] init_parameters_sd = 1 learning_rate = 2e-1 batch_size = 50 max_epochs = 20 mlp_model = MLP(input_dim=784, output_dim=10, hidden_dims=hidden_dims, activation_functions=activation_functions, init_parameters_sd=init_parameters_sd, optimizer=SGD(learning_rate=learning_rate)) print(mlp_model) train_model(mlp_model, x_train, y_train, batch_size=batch_size, max_epochs=max_epochs, x_val=x_val, y_val=y_val, plot=True, early_stop=True, patience=2) file_name = f'mlp_model_{hidden_dims}_sd={init_parameters_sd}' + \ f'_lr={learning_rate}_b={batch_size}_{datetime.now().strftime("%m-%d-%Y_%H.%M")}.pkl' mlp_model.save_model(file_name) evaluate_model(mlp_model, x_test, y_test)
def get_results_for_cnn(x_train: np.ndarray, x_val: np.ndarray, y_train: np.ndarray, y_val: np.ndarray, kernel_size: int, simulation_i: int) -> Tuple: print( f'\n{datetime.now().strftime("%m-%d-%Y_%H.%M")} Model: CNN k={kernel_size}' + f' simulation {simulation_i + 1}/{simulation_number}') x_train = np.array([np.reshape(x, (28, 28)) for x in x_train]) x_val = np.array([np.reshape(x, (28, 28)) for x in x_val]) output_feature_map_dim = math.floor((28 - kernel_size + 2 * padding) / stride + 1) if max_pooling: output_feature_map_dim = math.floor(output_feature_map_dim / 2) conv_net = ConvolutionalNet(input_dim=(28, 28), kernel_number=kernel_number, kernel_size=kernel_size, fc_input_dim=kernel_number * output_feature_map_dim**2, output_dim=10, hidden_dims=[128], activation_functions=[fc_act_function], optimizer=Adam(learning_rate=learning_rate), initializer=HeInitializer()) sim_overall_epoch_num, sim_training_losses, sim_validation_losses, sim_validation_accuracies = \ train_model( conv_net, x_train, y_train, batch_size=batch_size, max_epochs=max_epochs, x_val=x_val, y_val=y_val, plot=False ) return sim_overall_epoch_num, sim_training_losses, sim_validation_losses, sim_validation_accuracies
def get_results_for_initializer(initializer_name: str, x_train: np.ndarray, x_val: np.ndarray, y_train: np.ndarray, y_val: np.ndarray) -> Dict: epochs_num = [] training_losses = [] validation_losses = [] validation_accuracies = [] for i in range(simulation_number): print(f'\n{datetime.now().strftime("%m-%d-%Y_%H.%M")} Initializer : {initializer_name}' + f' simulation {i + 1}/{simulation_number}') initializer = _get_initializer_by_name(initializer_name) mlp_model = MLP( input_dim=784, output_dim=10, hidden_dims=hidden_dims, activation_functions=[act_function], optimizer=optimizer, initializer=initializer ) sim_overall_epoch_num, sim_training_losses, sim_validation_losses, sim_validation_accuracies = \ train_model( mlp_model, x_train, y_train, batch_size=batch_size, max_epochs=max_epochs, x_val=x_val, y_val=y_val, plot=False ) epochs_num.append(sim_overall_epoch_num) training_losses.append(sim_training_losses) validation_losses.append(sim_validation_losses) validation_accuracies.append(sim_validation_accuracies) return {'epochs': epochs_num, 'train_losses': training_losses, 'val_losses': validation_losses, 'val_acc': validation_accuracies, 'optimizer': optimizer}
def analyze_activation_functions(): x_train, y_train, x_val, y_val, x_test, y_test = load_data_wrapper() simulation_number = 5 max_epochs = 7 batch_size = 50 weight_sd = 1.0 learning_rate = 1e-1 act_functions = [sigmoid, relu] act_functions_names = ['sigmoid', 'relu'] training_data_dictionary = {} for act_fn, act_fn_name in zip(act_functions, act_functions_names): epochs_num = [] training_losses = [] validation_losses = [] validation_accuracies = [] for i in range(simulation_number): print( f'\nActivation function : {act_fn_name}, simulation {i + 1}/{simulation_number}' ) mlp_model = MLP(input_dim=784, output_dim=10, hidden_dims=[30], activation_functions=[act_fn], init_parameters_sd=weight_sd, optimizer=SGD(learning_rate=learning_rate)) sim_overall_epoch_num, sim_training_losses, sim_validation_losses, sim_validation_accuracies = \ train_model( mlp_model, x_train, y_train, batch_size=batch_size, max_epochs=max_epochs, x_val=x_val, y_val=y_val, plot=False ) epochs_num.append(sim_overall_epoch_num) training_losses.append(sim_training_losses) validation_losses.append(sim_validation_losses) validation_accuracies.append(sim_validation_accuracies) training_data_dictionary[act_fn_name] = { 'epochs': epochs_num, 'train_losses': training_losses, 'val_losses': validation_losses, 'val_acc': validation_accuracies } file_name = f'act_functions_analysis_data_{act_functions_names}_{datetime.now().strftime("%m-%d-%Y_%H.%M")}.pkl' with open(file_name, 'wb') as handle: pkl.dump(training_data_dictionary, handle, protocol=pkl.HIGHEST_PROTOCOL) plot_losses_results(training_data_dictionary) plot_accuracies_results(training_data_dictionary)
def analyze_number_of_neurons(): x_train, y_train, x_val, y_val, x_test, y_test = load_data_wrapper() simulation_number = 5 learning_rate = 1e-1 batch_size = 50 max_epochs = 7 hidden_neurons_numbers = [30, 100, 300, 500] training_data_dictionary = {} for neurons_number in hidden_neurons_numbers: epochs_num = [] training_losses = [] validation_losses = [] validation_accuracies = [] for i in range(simulation_number): print(f'\nHidden neurons: {neurons_number}, simulation {i + 1}/{simulation_number}') mlp_model = MLP( input_dim=784, output_dim=10, hidden_dims=[neurons_number], activation_functions=[sigmoid], init_parameters_sd=1, optimizer=SGD(learning_rate=learning_rate) ) sim_overall_epoch_num, sim_training_losses, sim_validation_losses, sim_validation_accuracies = \ train_model( mlp_model, x_train, y_train, batch_size=batch_size, max_epochs=max_epochs, x_val=x_val, y_val=y_val, plot=False ) epochs_num.append(sim_overall_epoch_num) training_losses.append(sim_training_losses) validation_losses.append(sim_validation_losses) validation_accuracies.append(sim_validation_accuracies) training_data_dictionary[ neurons_number] = {'epochs': epochs_num, 'train_losses': training_losses, 'val_losses': validation_losses, 'val_acc': validation_accuracies} file_name = f'neuron_numbers_analysis_data_{hidden_neurons_numbers}_{datetime.now().strftime("%m-%d-%Y_%H.%M")}.pkl' with open(file_name, 'wb') as handle: pkl.dump(training_data_dictionary, handle, protocol=pkl.HIGHEST_PROTOCOL) plot_losses_results(training_data_dictionary) plot_accuracies_results(training_data_dictionary)
def get_results_for_mlp(x_train: np.ndarray, x_val: np.ndarray, y_train: np.ndarray, y_val: np.ndarray, simulation_i: int) -> Tuple: print(f'\n{datetime.now().strftime("%m-%d-%Y_%H.%M")} Model: MLP' + f' simulation {simulation_i + 1}/{simulation_number}') mlp_model = MLP(input_dim=784, output_dim=10, hidden_dims=mlp_hidden_dims, activation_functions=[fc_act_function], optimizer=Adam(learning_rate=learning_rate), initializer=HeInitializer()) sim_overall_epoch_num, sim_training_losses, sim_validation_losses, sim_validation_accuracies = \ train_model( mlp_model, x_train, y_train, batch_size=batch_size, max_epochs=max_epochs, x_val=x_val, y_val=y_val, plot=False ) return sim_overall_epoch_num, sim_training_losses, sim_validation_losses, sim_validation_accuracies
def run_training(): x_train, y_train, x_val, y_val, x_test, y_test = load_data_wrapper() x_train = np.array([np.reshape(x, (28, 28)) for x in x_train]) x_val = np.array([np.reshape(x, (28, 28)) for x in x_val]) x_test = np.array([np.reshape(x, (28, 28)) for x in x_test]) # x_train = x_train[:5000] # y_train = y_train[:5000] # x_val = x_val[:500] y_val = y_val[:500] learning_rate = 5e-3 batch_size = 50 max_epochs = 7 kernel_number = 4 kernel_size = 5 padding = 1 stride = 1 max_pooling = True output_feature_map_dim = math.floor((28 - kernel_size + 2 * padding) / stride + 1) if max_pooling: output_feature_map_dim = math.floor(output_feature_map_dim / 2) conv_net = ConvolutionalNet(input_dim=(28, 28), kernel_number=kernel_number, kernel_size=kernel_size, fc_input_dim=kernel_number * output_feature_map_dim**2, output_dim=10, hidden_dims=[128], activation_functions=[relu], optimizer=Adam(learning_rate=learning_rate), initializer=HeInitializer()) print(conv_net) index = 1 x, y = x_test[index, :], y_test[index, :] y_hat = conv_net(x) print(f'y_real:\n{y}') print('Before learning') print(f'\ny_hat:\n{y_hat}') train_model(conv_net, x_train, y_train, batch_size=batch_size, max_epochs=max_epochs, x_val=x_val, y_val=y_val, plot=True) y_hat = conv_net(x) print(f'y_real:\n{y}') print('After learning') print(f'\ny_hat:\n{y_hat}') evaluate_model(conv_net, x_test, y_test)