class ProLoTrain: """ class structure to train the BDT with a certain alpha. This class handles training the BDT, evaluating the BDT, and saving """ def __init__(self,num_schedules): self.arguments = Logger() self.alpha = .9 self.num_schedules = num_schedules self.home_dir = self.arguments.home_dir self.total_loss_array = [] load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( self.num_schedules) + '_inf_hetero_deadline_pairwise.pkl' self.data = pickle.load(open(load_directory, "rb")) self.X, self.Y, self.schedule_array = create_new_data(self.num_schedules, self.data) self.start_of_each_set_twenty = create_sets_of_20_from_x_for_pairwise_comparisions(self.X) self.model = ProLoNet(input_dim=len(self.X[0]), weights=None, comparators=None, leaves=64, output_dim=2, bayesian_embedding_dim=None, alpha=1.5, use_gpu=True, vectorized=True, is_value=False) use_gpu = True if use_gpu: self.model = self.model.cuda() print(self.model.state_dict()) params = list(self.model.parameters()) self.opt = torch.optim.RMSprop(params) self.num_iterations_predict_task = 0 self.total_iterations = 0 self.covergence_epsilon = .01 self.when_to_save = 1000 self.distribution_epsilon = .0001 def train(self): """ Trains BDT. Randomly samples a schedule and timestep within that schedule, produces training data using x_i - x_j and trains upon that. :return: """ # loss = nn.CrossEntropyLoss() training_done = False loss_func = AlphaLoss() # variables to keep track of loss and number of tasks trained over running_loss_predict_tasks = 0 num_iterations_predict_task = 0 while not training_done: # sample a timestep before the cutoff for cross_validation rand_timestep_within_sched = np.random.randint(len(self.start_of_each_set_twenty)) set_of_twenty = self.start_of_each_set_twenty[rand_timestep_within_sched] truth = self.Y[set_of_twenty] # find feature vector of true action taken phi_i_num = truth + set_of_twenty phi_i = self.X[phi_i_num] phi_i_numpy = np.asarray(phi_i) # iterate over pairwise comparisons for counter in range(set_of_twenty, set_of_twenty + 20): if counter == phi_i_num: # if counter == phi_i_num: continue else: phi_j = self.X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy # label = add_noise_pairwise(label, self.noise_percentage) if torch.cuda.is_available(): feature_input = Variable(torch.Tensor(feature_input.reshape(1, 13)).cuda()) P = Variable(torch.Tensor([1 - self.distribution_epsilon, self.distribution_epsilon]).cuda()) else: feature_input = Variable(torch.Tensor(feature_input.reshape(1, 13))) P = Variable(torch.Tensor([1 - self.distribution_epsilon, self.distribution_epsilon])) output = self.model(feature_input) loss = loss_func.forward(P, output, self.alpha) if torch.isnan(loss): print(self.alpha, ' :nan occurred at iteration ', self.total_iterations) # prepare optimizer, compute gradient, update params self.opt.zero_grad() if loss.item() < .001 or loss.item() > 50: pass else: loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() running_loss_predict_tasks += loss.item() num_iterations_predict_task += 1 for counter in range(set_of_twenty, set_of_twenty + 20): if counter == phi_i_num: continue else: phi_j = self.X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_j_numpy - phi_i_numpy if torch.cuda.is_available(): feature_input = Variable(torch.Tensor(feature_input.reshape(1, 13)).cuda()) P = Variable(torch.Tensor([self.distribution_epsilon, 1 - self.distribution_epsilon]).cuda()) else: feature_input = Variable(torch.Tensor(feature_input.reshape(1, 13))) P = Variable(torch.Tensor([self.distribution_epsilon, 1 - self.distribution_epsilon])) output = self.model(feature_input) loss = loss_func.forward(P, output, self.alpha) # prepare optimizer, compute gradient, update params self.opt.zero_grad() if loss.item() < .001 or loss.item() > 50: pass else: loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() running_loss_predict_tasks += loss.item() num_iterations_predict_task += 1 # add average loss to array # print(list(self.model.parameters())) self.total_loss_array.append(running_loss_predict_tasks / num_iterations_predict_task) num_iterations_predict_task = 0 running_loss_predict_tasks = 0 self.total_iterations += 1 if self.total_iterations > 25 and self.total_iterations % 50 == 1: print('total iterations is', self.total_iterations) print('total loss (average for each 40, averaged)', np.mean(self.total_loss_array[-40:])) if self.total_iterations > 0 and self.total_iterations % self.when_to_save == self.when_to_save - 1: # self.plot_nn() self.save_trained_nets('DDT' + str(self.num_schedules)) # self.evaluate() if self.total_iterations > 2500 and np.mean(self.total_loss_array[-100:]) - np.mean( self.total_loss_array[-500:]) < self.covergence_epsilon: training_done = True def evaluate_on_test_data(self, model, load_in_model=False): """ Evaluate performance of a trained network tuned upon the alpha divergence loss. This is tested on 20% of the data and will be stored in a text file. Note this function is called after training convergence :return: """ # define new optimizer that only optimizes gradient num_schedules = 75 loss_func = AlphaLoss() load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/test/' + str( num_schedules) + '_inf_hetero_deadline_pairwise.pkl' data = pickle.load(open(load_directory, "rb")) X, Y, schedule_array = create_new_data(num_schedules, data) start_of_each_set_twenty = create_sets_of_20_from_x_for_pairwise_comparisions(X) prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] if load_in_model: model.load_state_dict(torch.load('/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/model_homog.tar')['nn_state_dict']) for j in range(0, num_schedules): schedule_bounds = schedule_array[j] step = schedule_bounds[0] while step < schedule_bounds[1]: probability_matrix = np.zeros((20, 20)) for m, counter in enumerate(range(step, step + 20)): phi_i = X[counter] phi_i_numpy = np.asarray(phi_i) # for each set of twenty for n, second_counter in enumerate(range(step, step + 20)): # fill entire array with diagnols set to zero if second_counter == counter: # same as m = n continue phi_j = X[second_counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy if torch.cuda.is_available(): feature_input = Variable(torch.Tensor(feature_input.reshape(1, 13)).cuda()) else: feature_input = Variable(torch.Tensor(feature_input.reshape(1, 13))) # push through nets preference_prob = model.forward(feature_input) probability_matrix[m][n] = preference_prob[0].data.detach()[0].item() probability_matrix[n][m] = preference_prob[0].data.detach()[1].item() # Set of twenty is completed column_vec = np.sum(probability_matrix, axis=1) # top 1 choice = np.argmax(column_vec) # top 3 _, top_three = torch.topk(torch.Tensor(column_vec), 3) # Then do training update loop truth = Y[step] # index top 1 if choice == truth: prediction_accuracy[0] += 1 # index top 3 if truth in top_three: prediction_accuracy[1] += 1 # add average loss to array step += 20 # schedule finished print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', j) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] self.save_performance_results(percentage_accuracy_top1, percentage_accuracy_top3, 'inf_DDT'+ str(self.num_schedules)) def save_trained_nets(self, name): """ saves the model :return: """ torch.save({'nn_state_dict': self.model.state_dict(), 'parameters': self.arguments}, '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/BNN_' + name + '.tar') def save_performance_results(self, top1, top3, special_string): """ saves performance of top1 and top3 :return: """ print('top1_mean for ', self.alpha, ' is : ', np.mean(top1)) data = {'top1_mean': np.mean(top1), 'top3_mean': np.mean(top3), 'top1_stderr': np.std(top1) / np.sqrt(len(top1)), 'top3_stderr': np.std(top3) / np.sqrt(len(top3))} save_pickle(file=data, file_location=self.home_dir + '/saved_models/pairwise_saved_models/', special_string=special_string)
class BDTTrain: """ class structure to train the BDT with a certain alpha. This class handles training the BDT, evaluating the BDT, and saving """ def __init__(self, bayesian_dim): self.arguments = Logger() self.alpha = .9 self.num_schedules = 150 self.home_dir = self.arguments.home_dir self.total_loss_array = [] load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( self.num_schedules) + '_hetero_deadline_naive.pkl' self.bayesian_embedding_dim = int(bayesian_dim) self.data = pickle.load(open(load_directory, "rb")) self.X, self.Y, self.schedule_array = create_new_dataset( self.data, num_schedules=self.num_schedules) for i, each_element in enumerate(self.X): self.X[i] = each_element + list(range(20)) # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") use_gpu = True self.model = ProLoNet( input_dim=len(self.X[0]), weights=None, comparators=None, leaves=4, output_dim=20, bayesian_embedding_dim=self.bayesian_embedding_dim, alpha=1.5, use_gpu=use_gpu, vectorized=False, is_value=False) self.model_NLL = ProLoNet( input_dim=len(self.X[0]), weights=None, comparators=None, leaves=4, output_dim=20, bayesian_embedding_dim=self.bayesian_embedding_dim, alpha=1.5, use_gpu=use_gpu, vectorized=False, is_value=False) if use_gpu: self.model = self.model.cuda() self.model_NLL = self.model_NLL.cuda() print(self.model.state_dict()) params = list(self.model.parameters()) del params[0] self.opt = torch.optim.RMSprop([{ 'params': params }, { 'params': self.model.bayesian_embedding, 'lr': .001 }]) params = list(self.model_NLL.parameters()) del params[0] self.opt2 = torch.optim.RMSprop([{ 'params': params }, { 'params': self.model_NLL.bayesian_embedding, 'lr': .001 }]) self.num_iterations_predict_task = 0 self.total_iterations = 0 self.covergence_epsilon = .01 self.when_to_save = 1000 self.distribution_epsilon = .0001 self.embedding_list = [ torch.ones(self.bayesian_embedding_dim) * 1 / 3 for _ in range(self.num_schedules) ] self.embedding_list_NLL = [ torch.ones(self.bayesian_embedding_dim) * 1 / 3 for _ in range(self.num_schedules) ] def train(self): """ Trains BDT. Randomly samples a schedule and timestep within that schedule, and passes in the corresponding data in an attempt to classify which task was scheduled :return: """ criterion = torch.nn.CrossEntropyLoss() loss_func = AlphaLoss() training_done = False cv_cutoff = .8 * len(self.X) while not training_done: # sample a timestep before the cutoff for cross_validation rand_timestep_within_sched = np.random.randint(cv_cutoff) input_nn = self.X[rand_timestep_within_sched] truth_nn = self.Y[rand_timestep_within_sched] which_schedule = self.find_which_schedule_this_belongs_to( rand_timestep_within_sched) load_in_embedding(self.model, self.embedding_list, which_schedule) load_in_embedding(self.model_NLL, self.embedding_list_NLL, which_schedule) # iterate over pairwise comparisons if torch.cuda.is_available(): input_nn = Variable( torch.Tensor(np.asarray(input_nn).reshape( 1, 242)).cuda()) # change to 5 to increase batch size P = Variable(torch.Tensor(np.ones((1, 20)))).cuda() P *= self.distribution_epsilon P[0][truth_nn] = 1 - 19 * self.distribution_epsilon truth = Variable( torch.Tensor( np.asarray(truth_nn).reshape(1)).cuda().long()) else: input_nn = Variable( torch.Tensor(np.asarray(input_nn).reshape(1, 242))) P = Variable( torch.Tensor(np.ones((1, 20) * self.distribution_epsilon))) P[0][truth_nn] = 1 - 19 * self.distribution_epsilon truth = Variable( torch.Tensor(np.asarray(truth_nn).reshape(1)).long()) self.opt.zero_grad() output = self.model.forward(input_nn) loss = loss_func.forward(P, output, self.alpha) loss.backward() self.opt.step() self.opt2.zero_grad() output_nn = self.model_NLL.forward(input_nn) loss_nn = criterion(output_nn, truth) loss_nn.backward() self.opt2.step() self.total_loss_array.append(loss.item()) store_embedding_back(self.model, self.embedding_list, which_schedule) store_embedding_back(self.model_NLL, self.embedding_list_NLL, which_schedule) total_iterations = len(self.total_loss_array) if total_iterations % 50 == 49: print('loss at', total_iterations, 'is', loss.item()) print('loss_NN at', total_iterations, 'is', loss_nn.item()) if total_iterations > 100000: training_done = True def evaluate_alpha(self): """ Evaluate performance of a trained network. This is tested on 20% of the data and will be stored in a text file. :return: """ opt = torch.optim.RMSprop([{ 'params': self.model.bayesian_embedding, 'lr': .001 }]) loss_func = AlphaLoss() percentage_accuracy_top1 = [] for i, schedule in enumerate(self.schedule_array): if i < .8 * len(self.schedule_array): continue load_in_embedding(self.model, self.embedding_list, i) prediction_accuracy = 0 for count in range(schedule[0], schedule[1] + 1): input_nn = self.X[count] truth_nn = self.Y[count] # if torch.cuda.is_available(): # input_nn = Variable(torch.Tensor(np.asarray(net_input).reshape(1, 242)).cuda()) # truth = Variable(torch.Tensor(np.asarray(truth).reshape(1)).cuda().long()) # else: # input_nn = Variable(torch.Tensor(np.asarray(net_input).reshape(1, 242))) # truth = Variable(torch.Tensor(np.asarray(truth).reshape(1))) # iterate over pairwise comparisons if torch.cuda.is_available(): input_nn = Variable( torch.Tensor(np.asarray(input_nn).reshape( 1, 242)).cuda()) # change to 5 to increase batch size P = Variable(torch.Tensor(np.ones((1, 20)))).cuda() P *= self.distribution_epsilon P[0][truth_nn] = 1 - 19 * self.distribution_epsilon truth = Variable( torch.Tensor( np.asarray(truth_nn).reshape(1)).cuda().long()) else: input_nn = Variable( torch.Tensor(np.asarray(input_nn).reshape(1, 242))) P = Variable( torch.Tensor( np.ones((1, 20) * self.distribution_epsilon))) P[0][truth_nn] = 1 - 19 * self.distribution_epsilon truth = Variable( torch.Tensor(np.asarray(truth_nn).reshape(1)).long()) opt.zero_grad() output = self.model.forward(input_nn) loss = loss_func.forward(P, output, self.alpha) loss.backward() opt.step() index = torch.argmax(output).item() if index == truth.item(): prediction_accuracy += 1 print('Prediction Accuracy: top1: ', prediction_accuracy / 20) store_embedding_back(self.model, self.embedding_list, i) print('schedule num:', i) percentage_accuracy_top1.append(prediction_accuracy / 20) self.save_performance_results(percentage_accuracy_top1) print(np.mean(percentage_accuracy_top1)) def evaluate_other(self): """ Evaluate performance of a trained network. This is tested on 20% of the data and will be stored in a text file. :return: """ opt = torch.optim.RMSprop([{ 'params': self.model_NLL.bayesian_embedding, 'lr': .001 }]) criterion = torch.nn.CrossEntropyLoss() percentage_accuracy_top1 = [] for i, schedule in enumerate(self.schedule_array): if i < .8 * len(self.schedule_array): continue load_in_embedding(self.model, self.embedding_list, i) prediction_accuracy = 0 for count in range(schedule[0], schedule[1] + 1): input_nn = self.X[count] truth_nn = self.Y[count] if torch.cuda.is_available(): input_nn = Variable( torch.Tensor(np.asarray(input_nn).reshape(1, 242)).cuda()) truth = Variable( torch.Tensor( np.asarray(truth_nn).reshape(1)).cuda().long()) else: input_nn = Variable( torch.Tensor(np.asarray(input_nn).reshape(1, 242))) truth = Variable( torch.Tensor(np.asarray(truth_nn).reshape(1))) opt.zero_grad() output_nn = self.model_NLL.forward(input_nn) loss_nn = criterion(output_nn, truth) loss_nn.backward() opt.step() index = torch.argmax(output_nn).item() if index == truth.item(): prediction_accuracy += 1 print('Prediction Accuracy: top1: ', prediction_accuracy / 20) store_embedding_back(self.model_NLL, self.embedding_list, i) print('schedule num:', i) percentage_accuracy_top1.append(prediction_accuracy / 20) self.save_performance_results(percentage_accuracy_top1) print(np.mean(percentage_accuracy_top1)) def save_performance_results(self, top1): """ saves performance of top1 and top3 :return: """ print('top1_mean for ', self.alpha, ' is : ', np.mean(top1)) data = { 'top1_mean': np.mean(top1), 'top1_stderr': np.std(top1) / np.sqrt(len(top1)), 'embedding': self.bayesian_embedding_dim } save_pickle(file=data, file_location=self.home_dir + '/saved_models/naive_saved_models/', special_string=str(self.bayesian_embedding_dim) + 'baydim_naivetest.pkl') def find_which_schedule_this_belongs_to(self, sample_val): """ Takes a sample and determines with schedule this belongs to. Note: A schedule is task * task sized :param sample_val: an int :return: schedule num """ for i, each_array in enumerate(self.schedule_array): if each_array[0] <= sample_val <= each_array[1]: return i else: continue
class BDTTrain: """ class structure to train the BDT with a certain embedding. This class handles training the BDT, evaluating the BDT, and saving """ def __init__(self, bayesian_dim): self.arguments = Logger() self.alpha = .9 self.num_schedules = 200 # test on 40, train on 160 self.home_dir = self.arguments.home_dir self.total_loss_array = [] load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( self.num_schedules) + '_BDFIL_hetero_deadline_pairwise.pkl' self.bayesian_embedding_dim = int(bayesian_dim) self.data = pickle.load(open(load_directory, "rb")) self.X, self.Y, self.schedule_array = create_new_data( self.num_schedules, self.data) self.start_of_each_set_twenty = create_sets_of_20_from_x_for_pairwise_comparisions( self.schedule_array) self.embedding_list = [ torch.ones(self.bayesian_embedding_dim) * 1 / 3 for _ in range(self.num_schedules) ] # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") use_gpu = True self.model = ProLoNet( input_dim=len(self.X[0]), weights=None, comparators=None, leaves=16, output_dim=2, bayesian_embedding_dim=self.bayesian_embedding_dim, alpha=1.5, use_gpu=use_gpu, vectorized=True, is_value=False).cuda() if use_gpu: self.model = self.model.cuda() print(self.model.state_dict()) params = list(self.model.parameters()) # delete embedding parameter del params[0] self.opt = torch.optim.RMSprop(params, lr=.0001) # optimizer for the embedding self.embedding_optimizer = torch.optim.Adam([{ 'params': self.model.bayesian_embedding, 'lr': .01 }]) self.total_iterations = 0 self.covergence_epsilon = .01 self.when_to_save = 2000 self.distribution_epsilon = .0001 def train(self): """ Trains BDT. Randomly samples a schedule and timestep within that schedule, produces training data using x_i - x_j and trains upon that. :return: """ training_done = False cv_cutoff = .8 * len(self.start_of_each_set_twenty) loss_func = AlphaLoss() # variables to keep track of loss and number of tasks trained over running_loss_predict_tasks = 0 num_iterations_predict_task = 0 while not training_done: # sample a timestep before the cutoff for cross_validation rand_timestep_within_sched = np.random.randint(cv_cutoff) set_of_twenty = self.start_of_each_set_twenty[ rand_timestep_within_sched] # truth is the same as the task chosen # NOTE: if initial truth > 10, it is biggest task first. Else it is smallest task first. Approximate way to keep track of # the current type of schedule truth = self.Y[set_of_twenty] # get the current schedule that the timestep is from which_schedule = find_which_schedule_this_belongs_to( self.schedule_array, set_of_twenty) # load in the embedding from the list into the network load_in_embedding(self.model, self.embedding_list, which_schedule) # find feature vector of true action taken phi_i_num = truth + set_of_twenty phi_i = self.X[phi_i_num] phi_i_numpy = np.asarray(phi_i) # iterate over pairwise comparisons for counter in range(set_of_twenty, set_of_twenty + 20): if counter == phi_i_num: # if counter == phi_i_num: continue else: phi_j = self.X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy # transform into torch variables if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) P = Variable( torch.Tensor([ 1 - self.distribution_epsilon, self.distribution_epsilon ]).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) P = Variable( torch.Tensor([ 1 - self.distribution_epsilon, self.distribution_epsilon ])) # get model output output = self.model(feature_input) loss = loss_func.forward(P, output, self.alpha) # Updates embeddings and optimizer after 5000 passes through the data if self.total_iterations > 5000: self.opt.zero_grad() self.embedding_optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() self.embedding_optimizer.step() else: self.opt.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() running_loss_predict_tasks += loss.item() num_iterations_predict_task += 1 for counter in range(set_of_twenty, set_of_twenty + 20): if counter == phi_i_num: continue else: phi_j = self.X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_j_numpy - phi_i_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) P = Variable( torch.Tensor([ self.distribution_epsilon, 1 - self.distribution_epsilon ]).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) P = Variable( torch.Tensor([ self.distribution_epsilon, 1 - self.distribution_epsilon ])) output = self.model(feature_input) loss = loss_func.forward(P, output, self.alpha) # Updates embeddings and optimizer after 5000 passes through the data if self.total_iterations > 5000: self.opt.zero_grad() self.embedding_optimizer.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() self.embedding_optimizer.step() else: self.opt.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() running_loss_predict_tasks += loss.item() num_iterations_predict_task += 1 # save the embedding back into the list self.embedding_list = store_embedding_back(self.model, self.embedding_list, which_schedule) # add average accuracy across pairwise comparisons to array self.total_loss_array.append(running_loss_predict_tasks / num_iterations_predict_task) num_iterations_predict_task = 0 running_loss_predict_tasks = 0 self.total_iterations += 1 if self.total_iterations > 25 and self.total_iterations % 50 == 1: print('total iterations is', self.total_iterations) print('total loss (average for each 40, averaged)', np.mean(self.total_loss_array[-40:])) if self.total_iterations > 0 and self.total_iterations % self.when_to_save == self.when_to_save - 1: print(self.embedding_list) self.save_trained_nets() if self.total_iterations > 8000 and np.mean( self.total_loss_array[-100:]) - np.mean( self.total_loss_array[-500:]) < self.covergence_epsilon: training_done = True def evaluate(self, load_in_model=False): """ Evaluate performance of a trained network tuned upon the alpha divergence loss. This is tested on 20% of the data and will be stored in a text file. Note this function is called after training convergence :return: """ # define new optimizer that only optimizes gradient loss_func = AlphaLoss() checkpoint = self.model.state_dict().copy() embedding_list_copy = self.embedding_list.copy() prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] if load_in_model: self.model.load_state_dict( torch.load( '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/model_baydimtest_' + str(self.bayesian_embedding_dim) + '.tar')['nn_state_dict']) # step through the cv set for j in range(int(self.num_schedules * .8), self.num_schedules): optimizer_for_embedding = self.opt = torch.optim.SGD([{ 'params': self.model.bayesian_embedding, 'lr': .9 }]) load_in_embedding(self.model, self.embedding_list, j) schedule_bounds = self.schedule_array[j] step = schedule_bounds[ 0] # starting index of schedule within the data # until you complete schedule 1 while step < schedule_bounds[1]: probability_matrix = np.zeros((20, 20)) for m, counter in enumerate(range(step, step + 20)): phi_i = self.X[counter] phi_i_numpy = np.asarray(phi_i) # for each set of twenty for n, second_counter in enumerate(range(step, step + 20)): # fill entire array with diagnols set to zero if second_counter == counter: # same as m = n continue phi_j = self.X[second_counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13)).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) # push through nets preference_prob = self.model.forward(feature_input) probability_matrix[m][n] = preference_prob[ 0].data.detach()[0].item() probability_matrix[n][m] = preference_prob[ 0].data.detach()[1].item() # Set of twenty is completed column_vec = np.sum(probability_matrix, axis=1) # top 1 choice = np.argmax(column_vec) # top 3 _, top_three = torch.topk(torch.Tensor(column_vec), 3) # Then do training update loop truth = self.Y[step] # index top 1 if choice == truth: prediction_accuracy[0] += 1 # index top 3 if truth in top_three: prediction_accuracy[1] += 1 # forward phi_i_num = truth + step # old method: set_of_twenty[0] + truth phi_i = self.X[phi_i_num] phi_i_numpy = np.asarray(phi_i) # iterate over pairwise comparisons if choice == truth: pass else: for counter in range(step, step + 20): if counter == phi_i_num: # if counter == phi_i_num: continue else: phi_j = self.X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy # label = add_noise_pairwise(label, self.noise_percentage) if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape( 1, 13)).cuda()) P = Variable( torch.Tensor([ 1 - self.distribution_epsilon, self.distribution_epsilon ]).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) P = Variable( torch.Tensor([ 1 - self.distribution_epsilon, self.distribution_epsilon ])) output = self.model(feature_input) loss = loss_func.forward(P, output, self.alpha) # update till convergence, or under 50 iterations if loss.item() > .005: flag = False tracker = 0 while not flag: optimizer_for_embedding.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_( self.model.parameters(), 0.5) optimizer_for_embedding.step() output = self.model(feature_input) loss = loss_func.forward( P, output, self.alpha) tracker += 1 if tracker > 50: flag = True if loss.item() < .005: flag = True else: pass for counter in range(step, step + 20): if counter == phi_i_num: continue else: phi_j = self.X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_j_numpy - phi_i_numpy if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape( 1, 13)).cuda()) P = Variable( torch.Tensor([ self.distribution_epsilon, 1 - self.distribution_epsilon ]).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 13))) P = Variable( torch.Tensor([ self.distribution_epsilon, 1 - self.distribution_epsilon ])) output = self.model(feature_input) loss = loss_func.forward(P, output, self.alpha) # update till convergence, or under 50 iterations if loss.item() > .005: flag = False tracker = 0 while not flag: optimizer_for_embedding.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_( self.model.parameters(), 0.5) optimizer_for_embedding.step() output = self.model(feature_input) loss = loss_func.forward( P, output, self.alpha) tracker += 1 if tracker > 50: flag = True if loss.item() < .005: flag = True else: pass # add average loss to array self.embedding_list = store_embedding_back( self.model, self.embedding_list, j) step += 20 # schedule finished print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', j) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] self.save_performance_results(percentage_accuracy_top1, percentage_accuracy_top3) self.model.load_state_dict(checkpoint) self.embedding_list = embedding_list_copy def save_trained_nets(self): """ saves the model :return: """ torch.save( { 'nn_state_dict': self.model.state_dict(), 'parameters': self.arguments }, '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/model_baydimtest_' + str(self.bayesian_embedding_dim) + '.tar') def save_performance_results(self, top1, top3): """ saves performance of top1 and top3 :return: """ print('top1_mean for ', self.alpha, ' is : ', np.mean(top1)) data = { 'top1_mean': np.mean(top1), 'top3_mean': np.mean(top3), 'top1_stderr': np.std(top1) / np.sqrt(len(top1)), 'top3_stderr': np.std(top3) / np.sqrt(len(top3)), 'alpha': self.alpha } save_pickle(file=data, file_location=self.home_dir + '/saved_models/pairwise_saved_models/', special_string=str(self.bayesian_embedding_dim) + 'baydimtest.pkl') def find_optimal_embedding(self): """ Searches through all .pkl files and returns the name of the file with the highest top 1 accuracy :return: """ performance_files = glob.glob( self.home_dir + '/saved_models/pairwise_saved_models/*test.pkl') print(performance_files) performance_files = performance_files for i, file in enumerate(performance_files): data: dict = pickle.load(open(file, 'rb')) print(file, ': ', data['top1_mean'])
class BDTTrain: """ class structure to train the BDT with a certain alpha. This class handles training the BDT, evaluating the BDT, and saving """ def __init__(self): self.arguments = Logger() self.alpha = .9 self.num_schedules = 150 self.home_dir = self.arguments.home_dir self.total_loss_array = [] load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( self.num_schedules) + '_inf_hetero_deadline_naive.pkl' self.data = pickle.load(open(load_directory, "rb")) self.X, self.Y, self.schedule_array = create_new_dataset( self.data, num_schedules=self.num_schedules) for i, each_element in enumerate(self.X): self.X[i] = each_element + list(range(20)) # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.model = ProLoNet(input_dim=len(self.X[0]), weights=None, comparators=None, leaves=256, output_dim=20, bayesian_embedding_dim=None, alpha=1.5, use_gpu=True, vectorized=False, is_value=False) use_gpu = True if use_gpu: self.model = self.model.cuda() print(self.model.state_dict()) params = list(self.model.parameters()) self.opt = torch.optim.RMSprop([{'params': params}]) self.num_iterations_predict_task = 0 self.total_iterations = 0 self.covergence_epsilon = .01 self.when_to_save = 1000 self.distribution_epsilon = .0001 def train(self): """ Trains BDT. Randomly samples a schedule and timestep within that schedule, and passes in the corresponding data in an attempt to classify which task was scheduled :return: """ criterion = torch.nn.NLLLoss() training_done = False cv_cutoff = .8 * len(self.X) while not training_done: # sample a timestep before the cutoff for cross_validation rand_timestep_within_sched = np.random.randint(cv_cutoff) input_nn = self.X[rand_timestep_within_sched] truth_nn = self.Y[rand_timestep_within_sched] # iterate over pairwise comparisons if torch.cuda.is_available(): input_nn = Variable( torch.Tensor(np.asarray(input_nn).reshape( 1, 242)).cuda()) # change to 5 to increase batch size truth_nn = Variable( torch.Tensor( np.asarray(truth_nn).reshape(1)).cuda().long()) else: input_nn = Variable( torch.Tensor(np.asarray(input_nn).reshape(1, 242))) truth_nn = Variable( torch.Tensor(np.asarray(truth_nn).reshape(1))) self.opt.zero_grad() output_nn = self.model.forward(input_nn) loss_nn = criterion(output_nn, truth_nn) loss_nn.backward() self.opt.step() self.total_loss_array.append(loss_nn.item()) total_iterations = len(self.total_loss_array) if total_iterations % 50 == 49: print('loss at', total_iterations, 'is', loss_nn.item()) if total_iterations > 15000: training_done = True def evaluate(self, load_in_model=False): """ Evaluate performance of a trained network. This is tested on 20% of the data and will be stored in a text file. :return: """ percentage_accuracy_top1 = [] for i, schedule in enumerate(self.schedule_array): if i < .8 * len(self.schedule_array): continue prediction_accuracy = 0 for count in range(schedule[0], schedule[1] + 1): net_input = self.X[count] truth = self.Y[count] if torch.cuda.is_available(): input_nn = Variable( torch.Tensor(np.asarray(net_input).reshape( 1, 242)).cuda()) truth = Variable( torch.Tensor( np.asarray(truth).reshape(1)).cuda().long()) else: input_nn = Variable( torch.Tensor(np.asarray(net_input).reshape(1, 242))) truth = Variable(torch.Tensor( np.asarray(truth).reshape(1))) #####forward##### output = self.model.forward(input_nn) index = torch.argmax(output).item() if index == truth.item(): prediction_accuracy += 1 print('Prediction Accuracy: top1: ', prediction_accuracy / 20) print('schedule num:', i) percentage_accuracy_top1.append(prediction_accuracy / 20) print(np.mean(percentage_accuracy_top1))
class BDTTrain: """ class structure to train the BDT with a certain alpha. This class handles training the BDT, evaluating the BDT, and saving """ def __init__(self, alpha): self.arguments = Logger() self.alpha = alpha self.num_schedules = 150 self.home_dir = self.arguments.home_dir self.total_loss_array = [] load_directory = '/home/ghost/PycharmProjects/scheduling_environment/new_data_pickle/' + str( self.num_schedules) + 'pairwise.pkl' self.X = None self.Y = None self.schedule_array = None bayesian_embedding_dim = 14 self.data = pickle.load(open(load_directory, "rb")) self.X, self.Y, self.schedule_array = create_new_data(self.num_schedules, self.data) self.start_of_each_set_twenty = self.create_sets_of_20_from_x_for_pairwise_comparisions() self.embedding_list = [torch.ones(bayesian_embedding_dim) * 1 / 3 for _ in range(self.num_schedules)] # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.model = ProLoNet(input_dim=len(self.X[0]), weights=None, comparators=None, leaves=8, output_dim=2, bayesian_embedding_dim=bayesian_embedding_dim, alpha=1.5, use_gpu=True, vectorized=True, is_value=False).cuda() use_gpu = True if use_gpu: self.model = self.model.cuda() print(self.model.state_dict()) params = list(self.model.parameters()) del params[0] self.opt = torch.optim.RMSprop([{'params': params}, {'params': self.model.bayesian_embedding, 'lr': .001}]) self.num_iterations_predict_task = 0 self.total_iterations = 0 self.covergence_epsilon = .01 self.when_to_save = 1000 self.distribution_epsilon = .0001 def create_sets_of_20_from_x_for_pairwise_comparisions(self): """ Create sets of 20 to denote each timestep for all schedules :return: range(0, length_of_X, 20) """ length_of_X = len(self.X) return list(range(0, length_of_X, 20)) def find_which_schedule_this_belongs_to(self, sample_val): """ Takes a sample and determines with schedule this belongs to. Note: A schedule is task * task sized :param sample_val: an int :return: schedule num """ for i, each_array in enumerate(self.schedule_array): if each_array[0] <= sample_val <= each_array[1]: return i else: continue def train(self): """ Trains BDT. Randomly samples a schedule and timestep within that schedule, produces training data using x_i - x_j and trains upon that. :return: """ # loss = nn.CrossEntropyLoss() training_done = False cv_cutoff = int(.8 * len(self.start_of_each_set_twenty)) loss_func = AlphaLoss() # variables to keep track of loss and number of tasks trained over running_loss_predict_tasks = 0 num_iterations_predict_task = 0 while not training_done: # sample a timestep before the cutoff for cross_validation rand_timestep_within_sched = np.random.randint(cv_cutoff) set_of_twenty = self.start_of_each_set_twenty[rand_timestep_within_sched] truth = self.Y[set_of_twenty] which_schedule = self.find_which_schedule_this_belongs_to(set_of_twenty) load_in_embedding(self.model, self.embedding_list, which_schedule) # find feature vector of true action taken phi_i_num = truth + set_of_twenty phi_i = self.X[phi_i_num] phi_i_numpy = np.asarray(phi_i) # iterate over pairwise comparisons for counter in range(set_of_twenty, set_of_twenty + 20): if counter == phi_i_num: # if counter == phi_i_num: continue else: phi_j = self.X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy # label = add_noise_pairwise(label, self.noise_percentage) if torch.cuda.is_available(): feature_input = Variable(torch.Tensor(feature_input.reshape(1, 12)).cuda()) P = Variable(torch.Tensor([1 - self.distribution_epsilon, self.distribution_epsilon]).cuda()) else: feature_input = Variable(torch.Tensor(feature_input.reshape(1, 12))) P = Variable(torch.Tensor([1 - self.distribution_epsilon, self.distribution_epsilon])) output = self.model(feature_input) loss = loss_func.forward(P, output, self.alpha) if torch.isnan(loss): print(self.alpha, ' :nan occurred at iteration ', self.total_iterations) return # TODO: can prob just set training to true here if network still outputs proper stuff # prepare optimizer, compute gradient, update params self.opt.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() running_loss_predict_tasks += loss.item() print('after ', num_iterations_predict_task, ' the embedding has changed to :', self.model.state_dict()['bayesian_embedding']) num_iterations_predict_task += 1 for counter in range(set_of_twenty, set_of_twenty + 20): if counter == phi_i_num: continue else: phi_j = self.X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_j_numpy - phi_i_numpy if torch.cuda.is_available(): feature_input = Variable(torch.Tensor(feature_input.reshape(1, 12)).cuda()) P = Variable(torch.Tensor([self.distribution_epsilon, 1 - self.distribution_epsilon]).cuda()) else: feature_input = Variable(torch.Tensor(feature_input.reshape(1, 12))) P = Variable(torch.Tensor([self.distribution_epsilon, 1 - self.distribution_epsilon])) output = self.model(feature_input) loss = loss_func.forward(P, output, self.alpha) # if num_iterations_predict_task % 5 == 0: # print('loss is :', loss.item()) # clip any very high gradients # prepare optimizer, compute gradient, update params self.opt.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() print('after ', num_iterations_predict_task, ' the embedding has changed to :', self.model.state_dict()['bayesian_embedding']) running_loss_predict_tasks += loss.item() num_iterations_predict_task += 1 # add average loss to array # print(list(self.model.parameters())) store_embedding_back(self.model, self.embedding_list, which_schedule) self.total_loss_array.append(running_loss_predict_tasks / num_iterations_predict_task) num_iterations_predict_task = 0 running_loss_predict_tasks = 0 self.total_iterations += 1 if self.total_iterations > 25 and self.total_iterations % 50 == 1: print('total iterations is', self.total_iterations) print('total loss (average for each 40, averaged)', np.mean(self.total_loss_array[-40:])) # TODO: change running loss to actual loss if self.total_iterations > 0 and self.total_iterations % self.when_to_save == self.when_to_save - 1: # self.plot_nn() print(self.embedding_list) self.save_trained_nets() # self.evaluate() if self.total_iterations > 10000 and np.mean(self.total_loss_array[-100:]) - np.mean( self.total_loss_array[-500:]) < self.covergence_epsilon: training_done = True def evaluate(self, load_in_model=False): # TODO: can be changed to one batched forward pass """ Evaluate performance of a trained network tuned upon the alpha divergence loss. This is tested on 20% of the data and will be stored in a text file. Note this function is called after training convergence :return: """ # define new optimizer that only optimizes gradient loss_func = AlphaLoss() checkpoint = self.model.state_dict().copy() optimizer_for_embedding = self.opt = torch.optim.RMSprop([{'params': self.model.bayesian_embedding, 'lr': .001}]) prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] if load_in_model: self.model.load_state_dict(torch.load('/home/ghost/PycharmProjects/bayesian_prolo/model.tar')['nn_state_dict']) # for rest of schedule # i = .8 * len(self.start_of_each_set_twenty) # num_test_schedules = 150 * .2 for j in range(int(self.num_schedules * .8), self.num_schedules): load_in_embedding(self.model, self.embedding_list, j) schedule_bounds = self.schedule_array[j] step = schedule_bounds[0] while step < schedule_bounds[1]: probability_matrix = np.zeros((20, 20)) for m, counter in enumerate(range(step, step + 20)): phi_i = self.X[counter] phi_i_numpy = np.asarray(phi_i) # for each set of twenty for n, second_counter in enumerate(range(step, step + 20)): # fill entire array with diagnols set to zero if second_counter == counter: # same as m = n continue phi_j = self.X[second_counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy if torch.cuda.is_available(): feature_input = Variable(torch.Tensor(feature_input.reshape(1, 12)).cuda()) else: feature_input = Variable(torch.Tensor(feature_input.reshape(1, 12))) # push through nets preference_prob = self.model.forward(feature_input) probability_matrix[m][n] = preference_prob[0].data.detach()[0].item() probability_matrix[n][m] = preference_prob[0].data.detach()[1].item() # Set of twenty is completed column_vec = np.sum(probability_matrix, axis=1) # top 1 choice = np.argmax(column_vec) # top 3 _, top_three = torch.topk(torch.Tensor(column_vec), 3) # Then do training update loop truth = self.Y[step] # index top 1 if choice == truth: prediction_accuracy[0] += 1 # index top 3 if truth in top_three: prediction_accuracy[1] += 1 # forward phi_i_num = truth + step # old method: set_of_twenty[0] + truth phi_i = self.X[phi_i_num] phi_i_numpy = np.asarray(phi_i) # iterate over pairwise comparisons for counter in range(step, step + 20): if counter == phi_i_num: # if counter == phi_i_num: continue else: phi_j = self.X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy # label = add_noise_pairwise(label, self.noise_percentage) if torch.cuda.is_available(): feature_input = Variable(torch.Tensor(feature_input.reshape(1, 12)).cuda()) P = Variable(torch.Tensor([1 - self.distribution_epsilon, self.distribution_epsilon]).cuda()) else: feature_input = Variable(torch.Tensor(feature_input.reshape(1, 12))) P = Variable(torch.Tensor([1 - self.distribution_epsilon, self.distribution_epsilon])) output = self.model(feature_input) loss = loss_func.forward(P, output, self.alpha) # prepare optimizer, compute gradient, update params optimizer_for_embedding.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) optimizer_for_embedding.step() for counter in range(step, step + 20): if counter == phi_i_num: continue else: phi_j = self.X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_j_numpy - phi_i_numpy if torch.cuda.is_available(): feature_input = Variable(torch.Tensor(feature_input.reshape(1, 12)).cuda()) P = Variable(torch.Tensor([self.distribution_epsilon, 1 - self.distribution_epsilon]).cuda()) else: feature_input = Variable(torch.Tensor(feature_input.reshape(1, 12))) P = Variable(torch.Tensor([self.distribution_epsilon, 1 - self.distribution_epsilon])) output = self.model(feature_input) loss = loss_func.forward(P, output, self.alpha) # print('loss is :', loss.item()) # clip any very high gradients # prepare optimizer, compute gradient, update params self.opt.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() # add average loss to array store_embedding_back(self.model, self.embedding_list, j) print(self.model.state_dict()['bayesian_embedding']) step += 20 # schedule finished print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', j) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] self.save_performance_results(percentage_accuracy_top1, percentage_accuracy_top3) self.model.load_state_dict(checkpoint) def save_trained_nets(self): """ saves the model :return: """ torch.save({'nn_state_dict': self.model.state_dict(), 'parameters': self.arguments}, '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/model' + str(self.alpha) + '.tar') def save_performance_results(self, top1, top3): """ saves performance of top1 and top3 :return: """ print('top1_mean for ', self.alpha, ' is : ', np.mean(top1)) data = {'top1_mean': np.mean(top1), 'top3_mean': np.mean(top3), 'top1_stderr': np.std(top1) / np.sqrt(len(top1)), 'top3_stderr': np.std(top3) / np.sqrt(len(top3)), 'alpha': self.alpha} save_pickle(file=data, file_location=self.home_dir + '/saved_models/pairwise_saved_models/', special_string=str(self.alpha) + 'alpha.pkl') if np.mean(top1) > .6: exit() # TODO: function that searches through all the saved means and stderr, plots, and finds the highest def find_optimal_alpha(self): """ Searches through all .pkl files and returns the name of the file with the highest top 1 accuracy :return: """ performance_files = glob.glob(self.home_dir + '/*.pkl') performance_files2 = glob.glob(self.home_dir + '/saved_models/pairwise_saved_models/*alpha.pkl') print(performance_files2) performance_files = performance_files + performance_files2 max_val = 0 max_filename = None alpha = [] vals= [] for i, file in enumerate(performance_files): data: dict = pickle.load(open(file, 'rb')) print(file, ': ', data['top1_mean']) if data['top1_mean'] > max_val: max_val = data['top1_mean'] max_filename = file if 'alpha' in data.keys(): alpha.append(data['alpha']) vals.append(data['top1_mean']) plt.scatter(alpha,vals) plt.show() print('The optimal alpha is: ', max_filename, 'with an accuracy of: ', max_val) print('The exact value of alpha can be found by dividing the digits before the .tar by 10')
class BDTTrain: """ class structure to train the BDT with a certain alpha. This class handles training the BDT, evaluating the BDT, and saving """ def __init__(self, num_schedules): self.arguments = Logger() self.alpha = .9 self.num_schedules = num_schedules self.home_dir = self.arguments.home_dir self.total_loss_array = [] load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( self.num_schedules) + '_inf_hetero_deadline_naive.pkl' self.data = pickle.load(open(load_directory, "rb")) self.X, self.Y, self.schedule_array = create_new_dataset( self.data, num_schedules=self.num_schedules) for i, each_element in enumerate(self.X): self.X[i] = each_element + list(range(20)) # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.model = ProLoNet(input_dim=len(self.X[0]), weights=None, comparators=None, leaves=256, output_dim=20, bayesian_embedding_dim=None, alpha=1.5, use_gpu=True, vectorized=False, is_value=False) use_gpu = True if use_gpu: self.model = self.model.cuda() print(self.model.state_dict()) params = list(self.model.parameters()) self.opt = torch.optim.RMSprop([{'params': params}]) self.num_iterations_predict_task = 0 self.total_iterations = 0 self.covergence_epsilon = .01 self.when_to_save = 1000 self.distribution_epsilon = .0001 def train(self): """ Trains BDT. Randomly samples a schedule and timestep within that schedule, and passes in the corresponding data in an attempt to classify which task was scheduled :return: """ loss_func = AlphaLoss() training_done = False while not training_done: # sample a timestep before the cutoff for cross_validation rand_timestep_within_sched = np.random.randint(len(self.X)) input_nn = self.X[rand_timestep_within_sched] truth_nn = self.Y[rand_timestep_within_sched] # iterate over pairwise comparisons if torch.cuda.is_available(): input_nn = Variable( torch.Tensor(np.asarray(input_nn).reshape( 1, 242)).cuda()) # change to 5 to increase batch size P = Variable(torch.Tensor(np.ones((1, 20)))).cuda() P *= self.distribution_epsilon P[0][truth_nn] = 1 - 19 * self.distribution_epsilon truth = Variable( torch.Tensor( np.asarray(truth_nn).reshape(1)).cuda().long()) else: input_nn = Variable( torch.Tensor(np.asarray(input_nn).reshape(1, 242))) P = Variable( torch.Tensor(np.ones((1, 20) * self.distribution_epsilon))) P[0][truth_nn] = 1 - 19 * self.distribution_epsilon truth = Variable( torch.Tensor(np.asarray(truth_nn).reshape(1)).long()) self.opt.zero_grad() output = self.model.forward(input_nn) loss = loss_func.forward(P, output, self.alpha) if loss.item() < .05 or loss.item() > 5: pass else: loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() self.total_loss_array.append(loss.item()) total_iterations = len(self.total_loss_array) if total_iterations % 50 == 49: print('loss at', total_iterations, 'is', loss.item()) if total_iterations > 15000: training_done = True def evaluate_on_test_set(self, load_in_model=False): """ Evaluate performance of a trained network. This is tested on 20% of the data and will be stored in a text file. :return: """ num_schedules = 75 # load in new data load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/test/' + str( num_schedules) + '_inf_hetero_deadline_naive.pkl' data = pickle.load(open(load_directory, "rb")) X, Y, schedule_array = create_new_dataset(data, num_schedules) for i, each_element in enumerate(X): X[i] = each_element + list(range(20)) prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] for i, schedule in enumerate(schedule_array): for count in range(schedule[0], schedule[1] + 1): net_input = X[count] truth = Y[count] if torch.cuda.is_available(): input_nn = Variable( torch.Tensor(np.asarray(net_input).reshape( 1, 242)).cuda()) truth = Variable( torch.Tensor( np.asarray(truth).reshape(1)).cuda().long()) else: input_nn = Variable( torch.Tensor(np.asarray(net_input).reshape(1, 242))) truth = Variable(torch.Tensor( np.asarray(truth).reshape(1))) #####forward##### output = self.model.forward(input_nn) index = torch.argmax(output).item() # top 3 _, top_three = torch.topk(output, 3) if index == truth.item(): prediction_accuracy[0] += 1 if truth.item() in top_three.detach().cpu().tolist()[0]: prediction_accuracy[1] += 1 print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', i) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] self.save_performance_results( percentage_accuracy_top1, percentage_accuracy_top3, 'inf_DDT_small_' + str(self.num_schedules)) def save_performance_results(self, top1, top3, special_string): """ saves performance of top1 and top3 :return: """ print('top1_mean for ', self.alpha, ' is : ', np.mean(top1)) data = { 'top1_mean': np.mean(top1), 'top3_mean': np.mean(top3), 'top1_stderr': np.std(top1) / np.sqrt(len(top1)), 'top3_stderr': np.std(top3) / np.sqrt(len(top3)) } save_pickle(file=data, file_location=self.home_dir + '/saved_models/naive_saved_models/', special_string=special_string)
class BDT_Train(): def __init__(self): self.arguments = Logger() self.num_schedules = 150 self.home_dir = self.arguments.home_dir self.total_loss_array = [] load_directory = '/home/ghost/PycharmProjects/scheduling_environment/new_data_pickle/' + str( self.num_schedules) + 'pairwise.pkl' self.X = None self.Y = None self.schedule_array = None bayesian_embedding_dim = 4 self.data = pickle.load(open(load_directory, "rb")) self.create_new_data() self.start_of_each_set_twenty = self.create_sets_of_20_from_X_for_pairwise_comparisions( ) self.embedding_list = [ torch.ones(bayesian_embedding_dim) * 1 / 3 for i in range(self.num_schedules) ] device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") self.model = ProLoNet(input_dim=len(self.X[0]), weights=None, comparators=None, leaves=64, output_dim=1, bayesian_embedding_dim=bayesian_embedding_dim, alpha=0.5, use_gpu=True, vectorized=False, is_value=True).cuda() use_gpu = True if use_gpu: self.model = self.model.cuda() print(self.model.state_dict()) params = list(self.model.parameters()) del params[0] self.opt = torch.optim.RMSprop([{ 'params': params }, { 'params': self.model.bayesian_embedding, 'lr': .001 }]) self.num_iterations_predict_task = 0 self.total_iterations = 0 self.covergence_epsilon = .01 self.when_to_save = 150 self.criterion = torch.nn.BCELoss() def create_new_data(self): self.X = [] self.Y = [] self.schedule_array = [] for i in range(0, self.num_schedules): timesteps_where_events_are_scheduled = self.find_nums_with_task_scheduled_pkl( i) # should be 20 sets of 20 if i == 0: start = 0 else: start = self.schedule_array[-1][1] + 1 end = start + len(timesteps_where_events_are_scheduled) - 1 self.schedule_array.append([start, end]) # each block is of size 400 for each_timestep in timesteps_where_events_are_scheduled: input_nn, output = self.rebuild_input_output_from_pickle( i, each_timestep) self.X.append(input_nn) self.Y.append(output) def find_nums_with_task_scheduled_pkl(self, rand_schedule): nums = [] for i, timestep in enumerate(self.data[rand_schedule]): if ast.literal_eval(self.data[rand_schedule][i][18]) != -1: nums.append(i) else: continue return nums def rebuild_input_output_from_pickle(self, rand_schedule, rand_timestep): schedule_timestep_data = self.data[rand_schedule][rand_timestep] state_input = [] for i, element in enumerate(schedule_timestep_data): # if i == 0: # if type(ast.literal_eval(element)) == float: # state_input.append(ast.literal_eval(element)) # elif type(ast.literal_eval(element)) == int: # state_input.append(ast.literal_eval(element)) if i == 17: continue elif 18 > i > 4: if type(ast.literal_eval(element)) == float: state_input.append(ast.literal_eval(element)) elif type(ast.literal_eval(element)) == int: state_input.append(ast.literal_eval(element)) elif type(ast.literal_eval(element)) == list: state_input = state_input + ast.literal_eval(element) else: continue output = ast.literal_eval(schedule_timestep_data[18]) return state_input, output def create_sets_of_20_from_X_for_pairwise_comparisions(self): length_of_X = len(self.X) return list(range(0, length_of_X, 20)) def find_which_schedule_this_belongs_to(self, sample_val): """ Takes a sample and determines with schedule this belongs to. Note: A schedule is task * task sized :param sample_val: an int :return: schedule num """ for i, each_array in enumerate(self.schedule_array): if sample_val >= each_array[0] and sample_val <= each_array[1]: return i else: continue def train(self): """ Trains BDT. Randomly samples a schedule and timestep within that schedule, produces training data using x_i - x_j and trains upon that. :return: """ # loss = nn.CrossEntropyLoss() training_done = False cv_cutoff = .8 * len(self.start_of_each_set_twenty) loss_func = Alpha_Loss() running_loss_predict_tasks = 0 num_iterations_predict_task = 0 while not training_done: rand_timestep_within_sched = np.random.randint(cv_cutoff) set_of_twenty = self.start_of_each_set_twenty[ rand_timestep_within_sched] truth = self.Y[set_of_twenty] which_schedule = self.find_which_schedule_this_belongs_to( set_of_twenty) load_in_embedding(self.model, self.embedding_list, which_schedule) phi_i_num = truth + set_of_twenty # old method: set_of_twenty[0] + truth phi_i = self.X[phi_i_num] phi_i_numpy = np.asarray(phi_i) # variables to keep track of loss and number of tasks trained over # iterate over pairwise comparisons for counter in range(set_of_twenty, set_of_twenty + 20): if counter == phi_i_num: # if counter == phi_i_num: continue else: phi_j = self.X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_i_numpy - phi_j_numpy label = torch.ones((1, 1)) # label = add_noise_pairwise(label, self.noise_percentage) if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 12)).cuda()) label = Variable(torch.Tensor(label).cuda()) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 12))) label = Variable(torch.Tensor(label.reshape(1, 1))) output = self.model(feature_input) if output > 1: output = torch.floor(output) self.opt.zero_grad() loss = self.criterion(output, label) print('loss is :', loss) loss.backward() torch.nn.utils.clip_grad_norm_( self.model.parameters(), 0.5) # clip any very high gradients self.opt.step() running_loss_predict_tasks += loss.item() num_iterations_predict_task += 1 for counter in range(set_of_twenty, set_of_twenty + 20): if counter == phi_i_num: continue else: phi_j = self.X[counter] phi_j_numpy = np.asarray(phi_j) feature_input = phi_j_numpy - phi_i_numpy label = torch.zeros((1, 1)) if torch.cuda.is_available(): feature_input = Variable( torch.Tensor(feature_input.reshape(1, 12)).cuda()) label = Variable(torch.Tensor(label).cuda()) label = label.reshape((1, 1)) else: feature_input = Variable( torch.Tensor(feature_input.reshape(1, 12))) label = Variable(torch.Tensor(label.reshape(1, 1))) output = self.model(feature_input) loss = self.criterion(output, label) print('loss is :', loss) # clip any very high gradients # prepare optimizer, compute gradient, update params self.opt.zero_grad() loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() running_loss_predict_tasks += loss.item() num_iterations_predict_task += 1 # add average loss to array print(list(self.model.parameters())) store_embedding_back(self.model, self.embedding_list, which_schedule) self.total_loss_array.append(running_loss_predict_tasks / num_iterations_predict_task) num_iterations_predict_task = 0 running_loss_predict_tasks = 0 self.total_iterations += 1 print('total iterations is', self.total_iterations) if self.total_iterations > 25: print('total loss (average for each 40, averaged)', np.mean(self.total_loss_array[-20:])) if self.total_iterations > 0 and self.total_iterations % self.when_to_save == self.when_to_save - 1: # self.plot_nn() print(self.embedding_list) self.save_trained_nets() if self.total_iterations > 300000 and np.mean( self.total_loss_array[-100:]) - np.mean( self.total_loss_array[-500:]) < self.covergence_epsilon: self.training_done = True def evaluate(self): pass def save_trained_nets(self): torch.save( { 'nn_state_dict': self.model.state_dict(), 'parameters': self.arguments }, '/home/ghost/PycharmProjects/bayesian_prolo/')
class ProLoTrain: """ class structure to train the BDT with a certain alpha. This class handles training the BDT, evaluating the BDT, and saving """ def __init__(self, num_schedules): self.arguments = Logger() self.alpha = .9 self.num_schedules = num_schedules self.home_dir = self.arguments.home_dir self.total_loss_array = [] load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/' + str( self.num_schedules) + '_inf_hetero_deadline_naive.pkl' self.data = pickle.load(open(load_directory, "rb")) self.X, self.Y, self.schedule_array = create_new_dataset( num_schedules=self.num_schedules, data=self.data) for i, each_element in enumerate(self.X): self.X[i] = each_element + list(range(20)) self.model = ProLoNet(input_dim=len(self.X[0]), weights=None, comparators=None, leaves=16, output_dim=20, bayesian_embedding_dim=8, alpha=1.5, use_gpu=True, vectorized=True, is_value=False) use_gpu = True if use_gpu: self.model = self.model.cuda() print(self.model.state_dict()) params = list(self.model.parameters()) del params[0] self.opt = torch.optim.RMSprop([{ 'params': params }, { 'params': self.model.bayesian_embedding, 'lr': .001 }]) self.num_iterations_predict_task = 0 self.total_iterations = 0 self.covergence_epsilon = .01 self.when_to_save = 1000 self.distribution_epsilon = .0001 self.max_depth = 10 # TODO: add back in deepening self.embedding_list = [ torch.ones(8) * 1 / 3 for _ in range(self.num_schedules) ] def train(self): """ Trains PDDT. :return: """ threshold = .05 training_done = False loss_func = AlphaLoss() # deepening data deepen_data = {'samples': [], 'labels': [], 'embedding_indices': []} while not training_done: # sample a timestep before the cutoff for cross_validation rand_timestep_within_sched = np.random.randint(len(self.X)) input_nn = self.X[rand_timestep_within_sched] truth_nn = self.Y[rand_timestep_within_sched] which_schedule = find_which_schedule_this_belongs_to( self.schedule_array, rand_timestep_within_sched) load_in_embedding(self.model, self.embedding_list, which_schedule) deepen_data['samples'].append(np.array(input_nn)) if torch.cuda.is_available(): input_nn = Variable( torch.Tensor(np.asarray(input_nn).reshape( 1, 242)).cuda()) # change to 5 to increase batch size P = Variable(torch.Tensor(np.ones((1, 20)))).cuda() P *= self.distribution_epsilon P[0][truth_nn] = 1 - 19 * self.distribution_epsilon truth = Variable( torch.Tensor( np.asarray(truth_nn).reshape(1)).cuda().long()) else: input_nn = Variable( torch.Tensor(np.asarray(input_nn).reshape(1, 242))) P = Variable( torch.Tensor(np.ones((1, 20) * self.distribution_epsilon))) P[0][truth_nn] = 1 - 19 * self.distribution_epsilon truth = Variable( torch.Tensor(np.asarray(truth_nn).reshape(1)).long()) self.opt.zero_grad() output = self.model.forward(input_nn) loss = loss_func.forward(P, output, self.alpha) if loss.item() < .001 or loss.item() > 30: pass else: loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) self.opt.step() deepen_data['labels'].extend([truth.item()]) deepen_data['embedding_indices'].extend([which_schedule]) # add average loss to array # print(list(self.model.parameters())) self.embedding_list = store_embedding_back(self.model, self.embedding_list, which_schedule) self.total_loss_array.append(loss.item()) self.total_iterations += 1 if self.total_iterations > 25 and self.total_iterations % 50 == 1: print('total iterations is', self.total_iterations) print('total loss (average for each 40, averaged)', np.mean(self.total_loss_array[-40:])) if self.total_iterations > 0 and self.total_iterations % self.when_to_save == self.when_to_save - 1: self.save_trained_nets('BDDT' + str(self.num_schedules)) threshold -= .1 if self.total_iterations % 500 == 499: self.model = deepen_with_embeddings( self.model, deepen_data, self.embedding_list, max_depth=self.max_depth, threshold=threshold / len(self.model.leaf_init_information)) params = list(self.model.parameters()) del params[0] self.opt = torch.optim.RMSprop([{ 'params': params }, { 'params': self.model.bayesian_embedding, 'lr': .001 }]) deepen_data = { 'samples': [], 'labels': [], 'embedding_indices': [] } if self.total_iterations > 5000 and np.mean( self.total_loss_array[-100:]) - np.mean( self.total_loss_array[-500:]) < self.covergence_epsilon: training_done = True def evaluate_on_test_data(self, load_in_model=False): """ Evaluate performance of a trained network tuned upon the alpha divergence loss. Note this function is called after training convergence :return: """ # define new optimizer that only optimizes gradient num_schedules = 75 loss_func = AlphaLoss() load_directory = '/home/ghost/PycharmProjects/bayesian_prolo/scheduling_env/datasets/test/' + str( num_schedules) + '_inf_hetero_deadline_naive.pkl' data = pickle.load(open(load_directory, "rb")) X, Y, schedule_array = create_new_dataset(num_schedules=num_schedules, data=data) for i, each_element in enumerate(X): X[i] = each_element + list(range(20)) embedding_optimizer = torch.optim.RMSprop([{ 'params': self.model.bayesian_embedding, 'lr': .001 }]) embedding_list = [torch.ones(8) * 1 / 3 for _ in range(num_schedules)] prediction_accuracy = [0, 0] percentage_accuracy_top1 = [] percentage_accuracy_top3 = [] if load_in_model: self.model.load_state_dict( torch.load( '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/pairwise_saved_models/model_homog.tar' )['nn_state_dict']) for i, schedule in enumerate(schedule_array): load_in_embedding(self.model, embedding_list, i) for count in range(schedule[0], schedule[1] + 1): net_input = X[count] truth = Y[count] if torch.cuda.is_available(): input_nn = Variable( torch.Tensor(np.asarray(net_input).reshape( 1, 242)).cuda()) # change to 5 to increase batch size P = Variable(torch.Tensor(np.ones((1, 20)))).cuda() P *= self.distribution_epsilon P[0][truth] = 1 - 19 * self.distribution_epsilon truth = Variable( torch.Tensor( np.asarray(truth).reshape(1)).cuda().long()) else: input_nn = Variable( torch.Tensor(np.asarray(net_input).reshape(1, 242))) P = Variable( torch.Tensor( np.ones((1, 20) * self.distribution_epsilon))) P[0][truth] = 1 - 19 * self.distribution_epsilon truth = Variable( torch.Tensor(np.asarray(truth).reshape(1)).long()) #####forward##### output = self.model.forward(input_nn) embedding_optimizer.zero_grad() loss = loss_func.forward(P, output, self.alpha) if loss.item() < .001 or loss.item() > 30: pass else: loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), 0.5) embedding_optimizer.step() index = torch.argmax(output).item() # top 3 _, top_three = torch.topk(output, 3) if index == truth.item(): prediction_accuracy[0] += 1 if truth.item() in top_three.detach().cpu().tolist()[0]: prediction_accuracy[1] += 1 # add average loss to array embedding_list = store_embedding_back(self.model, embedding_list, i) # schedule finished print('Prediction Accuracy: top1: ', prediction_accuracy[0] / 20, ' top3: ', prediction_accuracy[1] / 20) print('schedule num:', i) percentage_accuracy_top1.append(prediction_accuracy[0] / 20) percentage_accuracy_top3.append(prediction_accuracy[1] / 20) prediction_accuracy = [0, 0] self.save_performance_results(percentage_accuracy_top1, percentage_accuracy_top3, 'inf_BDT' + str(self.num_schedules)) def save_trained_nets(self, name): """ saves the model :return: """ torch.save( { 'nn_state_dict': self.model.state_dict(), 'parameters': self.arguments }, '/home/ghost/PycharmProjects/bayesian_prolo/saved_models/naive_saved_models/BNN_' + name + '.tar') def save_performance_results(self, top1, top3, special_string): """ saves performance of top1 and top3 :return: """ print('top1_mean for ', self.alpha, ' is : ', np.mean(top1)) data = { 'top1_mean': np.mean(top1), 'top3_mean': np.mean(top3), 'top1_stderr': np.std(top1) / np.sqrt(len(top1)), 'top3_stderr': np.std(top3) / np.sqrt(len(top3)) } save_pickle(file=data, file_location=self.home_dir + '/saved_models/naive_saved_models/', special_string=special_string)