def evaluate_on_test_data(model, schedules_trained_on, load_in_model=False): """ Evaluate performance of a trained network tuned upon the alpha divergence loss. Note this function is called after training convergence :return: """ num_schedules = 100 # load in new data load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( num_schedules) + 'test_dist_early_hili_pairwise.pkl' data = pickle.load(open(load_directory, "rb")) X, Y, schedule_array = create_new_data(num_schedules, data) prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] for j in range(0, num_schedules): schedule_bounds = schedule_array[j] step = schedule_bounds[0] while step < schedule_bounds[1]: probability_matrix = np.zeros((20, 20)) for m, counter in enumerate(range(step, step + 20)): phi_i = X[counter] phi_i_numpy = np.asarray(phi_i) # for each set of twenty for n, second_counter in enumerate(range(step, step + 20)): # fill entire array with diagnols set to zero if second_counter == counter: # same as m = n continue phi_j = X[second_counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) # push through nets preference_prob = model.forward(feature_input) probability_matrix[m][n] = preference_prob[ 0].data.detach()[0].item( ) # TODO: you can do a check if only this line leads to the same thing as the line below # probability_matrix[n][m] = preference_prob[0].data.detach()[1].item() # Set of twenty is completed column_vec = np.sum(probability_matrix, axis=1) # top 1 # given all inputs, and their liklihood of being scheduled, predict the output highest_val = max(column_vec) all_indexes_that_have_highest_val = [ i for i, e in enumerate(list(column_vec)) if e == highest_val ] if len(all_indexes_that_have_highest_val) > 1: print('length of indexes greater than 1: ', all_indexes_that_have_highest_val) # top 1 choice = np.random.choice(all_indexes_that_have_highest_val) # choice = np.argmax(column_vec) # top 3 _, top_three = torch.topk(torch.Tensor(column_vec), 3) # Then do training update loop truth = Y[step] # index top 1 if choice == truth: prediction_accuracy[0] += 1 # index top 3 if truth in top_three: prediction_accuracy[1] += 1 # add average loss to array step += 20 # schedule finished print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', j) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] save_performance_results(percentage_accuracy_top1, percentage_accuracy_top3, 'NN_pairwise.pkl')
def evaluate_on_test_data(self, models, schedules_trained_on, load_in_model=False): """ Evaluate performance of a trained network tuned upon the alpha divergence loss. Note this function is called after training convergence :return: """ autoencoder_class = AutoEncoderTrain(150) checkpoint = torch.load( '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/Autoencoder150.tar' ) autoencoder_class.model.load_state_dict(checkpoint['nn_state_dict']) states = self.create_iterables() load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( 100) + 'test_dist_early_hili_naive.pkl' data = pickle.load(open(load_directory, "rb")) X_naive, Y_naive, schedule_array = create_new_dataset(data, 100) for i, each_element in enumerate(X_naive): X_naive[i] = each_element + list(range(20)) num_schedules = 100 # load in new data load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( num_schedules) + 'test_dist_early_hili_pairwise.pkl' data = pickle.load(open(load_directory, "rb")) X, Y, schedule_array = create_new_data(num_schedules, data) prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] mean_input = [ 1.3277743, 0.32837677, 1.4974482, -1.3519306, -0.64621973, 0.10534518, -2.338118, -2.7345326, 1.7558736, -3.0746384, -3.485554 ] for j in range(0, num_schedules): current_schedule_matrix = np.zeros((2048, 20)) schedule_bounds = schedule_array[j] step = schedule_bounds[0] while step < schedule_bounds[1]: probability_matrix = np.zeros((20, 20)) if current_schedule_matrix.sum() == 0: cluster_num = self.kmeans_model.predict( current_schedule_matrix.reshape(1, -1)) else: matrix = np.divide(current_schedule_matrix, current_schedule_matrix.sum()) cluster_num = self.kmeans_model.predict( matrix.reshape(1, -1)) for m, counter in enumerate(range(step, step + 20)): phi_i = X[counter] phi_i_numpy = np.asarray(phi_i) # for each set of twenty for n, second_counter in enumerate(range(step, step + 20)): # fill entire array with diagnols set to zero if second_counter == counter: # same as m = n continue phi_j = X[second_counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) # push through nets to get preferences preference_prob = self.models[int( cluster_num)].forward(feature_input) probability_matrix[m][n] = preference_prob[ 0].data.detach()[0].item( ) # TODO: you can do a check if only this line leads to the same thing as the line below # probability_matrix[n][m] = preference_prob[0].data.detach()[1].item() # Set of twenty is completed column_vec = np.sum(probability_matrix, axis=1) # top 1 highest_val = max(column_vec) all_indexes_that_have_highest_val = [ i for i, e in enumerate(list(column_vec)) if e == highest_val ] if len(all_indexes_that_have_highest_val) > 1: print('length of indexes greater than 1: ', all_indexes_that_have_highest_val) # top 1 choice = np.random.choice(all_indexes_that_have_highest_val) # choice = np.argmax(probability_vector) # top 3 _, top_three = torch.topk(torch.Tensor(column_vec), 3) # Then do training update loop truth = Y[step] # index top 1 if choice == truth: prediction_accuracy[0] += 1 # index top 3 if truth in top_three: prediction_accuracy[1] += 1 embedding_copy = np.zeros((1, 11)) input_element = autoencoder_class.model.forward_only_encoding( Variable( torch.Tensor( np.asarray(X_naive[int(step / 20)]).reshape( 1, 242)).cuda())) for z, each_element in enumerate(mean_input): if each_element > input_element[0][z].item(): embedding_copy[0][z] = 0 else: embedding_copy[0][z] = 1 index = self.pass_in_embedding_out_state_ID( states, embedding_copy[0]) action = Y[step] current_schedule_matrix[index][int(action)] += 1 # add average loss to array step += 20 # schedule finished print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', j) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] save_performance_results(percentage_accuracy_top1, percentage_accuracy_top3, 'pairwise_NN_kmeans.pkl') return percentage_accuracy_top1
def evaluate_on_test_data(self, model): """ Evaluate performance of a trained network tuned upon the alpha divergence loss. Note this function is called after training convergence :return: """ num_schedules = 100 # load in new data load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( num_schedules) + 'test_dist_early_hili_pairwise.pkl' # embedding_optimizer = torch.optim.SGD(model.EmbeddingList.parameters(), lr=.4) criterion = torch.nn.BCELoss() data = pickle.load(open(load_directory, "rb")) X, Y, schedule_array = create_new_data(num_schedules, data) embedding_list = [torch.ones(3) * 1 / 3 for i in range(num_schedules)] prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] for j in range(0, num_schedules): schedule_bounds = schedule_array[j] step = schedule_bounds[0] # for each schedule, set the embedding model.set_bayesian_embedding(embedding_list[j]) while step < schedule_bounds[1]: probability_vector = np.zeros((1, 20)) for m, counter in enumerate(range(step, step + 20)): phi_i = X[counter] phi_i_numpy = np.asarray(phi_i) feature_input = phi_i_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) # push through nets preference_prob = model.forward(feature_input) probability_vector[0][m] = preference_prob[0].data.detach( )[0].item( ) # TODO: you can do a check if only this line leads to the same thing as the line below # probability_matrix[n][m] = preference_prob[0].data.detach()[1].item() # given all inputs, and their liklihood of being scheduled, predict the output highest_val = max(probability_vector[0]) all_indexes_that_have_highest_val = [ i for i, e in enumerate(list(probability_vector[0])) if e == highest_val ] if len(all_indexes_that_have_highest_val) > 1: print('length of indexes greater than 1: ', all_indexes_that_have_highest_val) # top 1 choice = np.random.choice(all_indexes_that_have_highest_val) # choice = np.argmax(probability_vector) # top 3 _, top_three = torch.topk(torch.Tensor(probability_vector), 3) truth = Y[step] # index top 1 if choice == truth: prediction_accuracy[0] += 1 # index top 3 if truth in top_three: prediction_accuracy[1] += 1 # update loop truth = Y[step] phi_i_num = truth + step # old method: set_of_twenty[0] + truth for counter in range(step, step + 20): if counter == phi_i_num: # if counter == phi_i_num: label = torch.ones((1, 1)) else: label = torch.zeros((1, 1)) phi_j = X[counter] phi = np.asarray(phi_j) feature_input = phi if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) label = Variable(torch.Tensor(label).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) label = Variable(torch.Tensor(label.reshape(1, 1))) output = model.forward(feature_input) self.embedding_optimizer.zero_grad() loss = criterion(output, label) loss.backward() # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) self.embedding_optimizer.step() print(model.EmbeddingList.state_dict()) embedding_list[j] = torch.Tensor( model.get_bayesian_embedding().detach().cpu().numpy() [0]) # very ugly # add average loss to array step += 20 # schedule finished print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', j) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] save_performance_results(percentage_accuracy_top1, percentage_accuracy_top3, 'pointwise_NN_unimodal.pkl') return percentage_accuracy_top1
def evaluate_on_test_data(self, models, schedules_trained_on): """ Evaluate performance of a trained network tuned upon the alpha divergence loss. Note this function is called after training convergence :return: """ num_schedules = 75 # load in new data load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( num_schedules) + 'test_high_low_hetero_deadline_pairwise.pkl' data = pickle.load(open(load_directory, "rb")) X, Y, schedule_array = create_new_data(num_schedules, data) ### take a side step and do some of the clustering stuff autoencoder_class = AutoEncoderTrain(num_schedules) autoencoder_class.model.load('/home/ghost/PycharmProjects/bayesian_prolo/saved_models/Autoencoder' + str(schedules_trained_on) + '.tar') autoencoder_class.compute_mean() autoencoder_class.create_iterables() autoencoder_class.round_each_encoding_and_create_array() autoencoder_class.populate_a_matrix_per_schedule() test_matrices = autoencoder_class.save_matrices() gmm_model, labels = self.cluster_matrices(test_matrices, num_schedules) prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] for j in range(0, num_schedules): schedule_bounds = schedule_array[j] step = schedule_bounds[0] self.model = models[labels[j]] while step < schedule_bounds[1]: probability_matrix = np.zeros((20, 20)) for m, counter in enumerate(range(step, step + 20)): phi_i = X[counter] phi_i_numpy = np.asarray(phi_i) # for each set of twenty for n, second_counter in enumerate(range(step, step + 20)): # fill entire array with diagnols set to zero if second_counter == counter: # same as m = n continue phi_j = X[second_counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy if torch.cuda.is_available(): feature_input = Variable(torch.Tensor(feature_input.reshape(1, 13)).cuda()) else: feature_input = Variable(torch.Tensor(feature_input.reshape(1, 13))) # push through nets preference_prob = self.model.forward(feature_input) probability_matrix[m][n] = preference_prob[0].data.detach()[ 0].item() # TODO: you can do a check if only this line leads to the same thing as the line below # probability_matrix[n][m] = preference_prob[0].data.detach()[1].item() # Set of twenty is completed column_vec = np.sum(probability_matrix, axis=1) # top 1 choice = np.argmax(column_vec) # top 3 _, top_three = torch.topk(torch.Tensor(column_vec), 3) # Then do training update loop truth = Y[step] # index top 1 if choice == truth: prediction_accuracy[0] += 1 # index top 3 if truth in top_three: prediction_accuracy[1] += 1 # add average loss to array step += 20 # schedule finished print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', j) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] save_performance_results(percentage_accuracy_top1, percentage_accuracy_top3, 'HIFI_LIFI_gmm_nn_small_pairwise' + str(schedules_trained_on) + '.pkl')
def evaluate_on_test_data(self): """ Evaluate performance of a trained network tuned upon the alpha divergence loss. Note this function is called after training convergence :return: """ num_schedules = 100 # load in new data load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( num_schedules) + 'test_dist_early_hili_pairwise.pkl' data = pickle.load(open(load_directory, "rb")) X, Y, schedule_array = create_new_data(num_schedules, data) # define embedding things embedding_list = [torch.ones(3) * 1 / 3 for i in range(num_schedules)] # learning rate increased self.opt = torch.optim.SGD(self.model.EmbeddingList.parameters(), lr=.0001) criterion = torch.nn.BCELoss() prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] for j in range(0, num_schedules): schedule_bounds = schedule_array[j] step = schedule_bounds[0] self.model.set_bayesian_embedding(embedding_list[j]) while step < schedule_bounds[1]: probability_matrix = np.zeros((20, 20)) for m, counter in enumerate(range(step, step + 20)): phi_i = X[counter] phi_i_numpy = np.asarray(phi_i) # for each set of twenty for n, second_counter in enumerate(range(step, step + 20)): # fill entire array with diagnols set to zero if second_counter == counter: # same as m = n continue phi_j = X[second_counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) # push through nets to get preferences preference_prob = self.model.forward(feature_input) probability_matrix[m][n] = preference_prob[ 0].data.detach()[0].item( ) # TODO: you can do a check if only this line leads to the same thing as the line below # probability_matrix[n][m] = preference_prob[0].data.detach()[1].item() # Set of twenty is completed column_vec = np.sum(probability_matrix, axis=1) # top 1 highest_val = max(column_vec) all_indexes_that_have_highest_val = [ i for i, e in enumerate(list(column_vec)) if e == highest_val ] if len(all_indexes_that_have_highest_val) > 1: print('length of indexes greater than 1: ', all_indexes_that_have_highest_val) # top 1 choice = np.random.choice(all_indexes_that_have_highest_val) # top 3 _, top_three = torch.topk(torch.Tensor(column_vec), 3) truth = Y[step] # index top 1 if choice == truth: prediction_accuracy[0] += 1 # index top 3 if truth in top_three: prediction_accuracy[1] += 1 # Then do training update loop phi_i_num = truth + step phi_i = X[phi_i_num] phi_i_numpy = np.asarray(phi_i) # iterate over pairwise comparisons for counter in range(step, step + 20): if counter == phi_i_num: continue else: phi_j = X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) label = Variable( torch.Tensor(torch.ones((1, 1))).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) label = Variable(torch.Tensor(torch.ones((1, 1)))) output = self.model(feature_input) loss = criterion(output, label) # prepare optimizer, compute gradient, update params self.embedding_optimizer.zero_grad() loss.backward() # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.embedding_optimizer.step() print(self.model.EmbeddingList.state_dict()) for counter in range(step, step + 20): if counter == phi_i_num: continue else: phi_j = X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_j_numpy - phi_i_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) label = Variable( torch.Tensor(torch.zeros((1, 1))).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) label = Variable(torch.Tensor(torch.zeros((1, 1)))) output = self.model.forward(feature_input) self.embedding_optimizer.zero_grad() loss = criterion(output, label) loss.backward() # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.embedding_optimizer.step() # add average loss to array step += 20 # schedule finished print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', j) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] save_performance_results(percentage_accuracy_top1, percentage_accuracy_top3, 'NN_w_embedding_pairwise.pkl')
def evaluate_on_test_data(model, schedules_trained_on, load_in_model=False): """ Evaluate performance of a trained network tuned upon the alpha divergence loss. Note this function is called after training convergence :return: """ num_schedules = 100 # load in new data load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( num_schedules) + 'test_dist_early_hili_pairwise.pkl' data = pickle.load(open(load_directory, "rb")) X, Y, schedule_array = create_new_data(num_schedules, data) prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] for j in range(0, num_schedules): schedule_bounds = schedule_array[j] step = schedule_bounds[0] while step < schedule_bounds[1]: probability_vector = np.zeros((1, 20)) for m, counter in enumerate(range(step, step + 20)): phi_i = X[counter] phi_i_numpy = np.asarray(phi_i) feature_input = phi_i_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) # push through nets preference_prob = model.forward(feature_input) probability_vector[0][m] = preference_prob[0].data.detach( )[0].item( ) # TODO: you can do a check if only this line leads to the same thing as the line below # probability_matrix[n][m] = preference_prob[0].data.detach()[1].item() print(probability_vector) highest_val = max(probability_vector[0]) all_indexes_that_have_highest_val = [ i for i, e in enumerate(list(probability_vector[0])) if e == highest_val ] # top 1 choice = np.random.choice(all_indexes_that_have_highest_val) # choice = np.argmax(probability_vector) # top 3 _, top_three = torch.topk(torch.Tensor(probability_vector), 3) # Then do training update loop truth = Y[step] # index top 1 if choice == truth: prediction_accuracy[0] += 1 # index top 3 if truth in top_three: prediction_accuracy[1] += 1 # add average loss to array step += 20 # schedule finished print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', j) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] save_performance_results(percentage_accuracy_top1, percentage_accuracy_top3, 'pointwise_NN.pkl') return percentage_accuracy_top1