#Solve with learned reward functions y_mc_relu_v, y_mc_relu_q, y_mc_relu_logp, y_mc_relu_P = linearvalueiteration( mdp_data, y_mc_relu_reward) ''' # Print results print("\nTrue R has:\n - negated likelihood: {}\n - EVD: {}".format(trueNLL, irl_model.NLL.calculate_EVD(truep, r))) print("\nPred R with ReLU activation has:\n - negated likelihood: {}\n - EVD: {}".format(irl_model.NLL.apply(y_mc_relu_reward, initD, mu_sa, muE, feature_data['splittable'], mdp_data), irl_model.NLL.calculate_EVD(truep, y_mc_relu_reward))) ''' # Initalise loss function NLL = NLLFunction() # Assign loss function constants NLL.F = feature_data['splittable'] NLL.muE = muE NLL.mu_sa = mu_sa NLL.initD = initD NLL.mdp_data = mdp_data #Save results print('\n... saving results ...\n') # Create path for trained models RESULTS_PATH = "./noisey_paths/results/dropout/" for path in [RESULTS_PATH]: try: os.makedirs(path) except FileExistsError: pass NP_results = [ y_mc_relu, y_mc_std_relu, y_mc_relu_reward, y_mc_relu_v, y_mc_relu_P,
trueNLL = NNIRL_param_list[14] normalise = NNIRL_param_list[15] user_input = NNIRL_param_list[16] worldtype = NNIRL_param_list[17] torch.manual_seed(mdp_params['seed']) np.random.seed(seed=mdp_params['seed']) random.seed(mdp_params['seed']) # Initalise tester loss function testNLL = NLLFunction() # Assign tester loss function constants testNLL.F = feature_data['splittable'] testNLL.muE = muE testNLL.mu_sa = mu_sa testNLL.initD = initD testNLL.mdp_data = mdp_data #Print what benchmark if (user_input): if worldtype == "gridworld" or worldtype == "gw" or worldtype == "grid": print('\n... training on GridWorld benchmark ... \n') elif worldtype == "objectworld" or worldtype == "ow" or worldtype == "obj": print('\n... training on ObjectWorld benchmark ... \n') else: print('\n... training on GridWorld benchmark ... \n') #Print true R loss print('\n... true reward loss is', trueNLL.item(), '... \n') # Connect configuration dict
def run_single_NN(): task = Task.init( project_name='MSci-Project', task_name='Gridworld, n=32, b=4, normal') #init task on ClearML #load variables from file open_file = open("NNIRL_param_list.pkl", "rb") NNIRL_param_list = pickle.load(open_file) open_file.close() threshold = NNIRL_param_list[0] optim_type = NNIRL_param_list[1] net = NNIRL_param_list[2] X = NNIRL_param_list[3] initD = NNIRL_param_list[4] mu_sa = NNIRL_param_list[5] muE = NNIRL_param_list[6] F = NNIRL_param_list[7] #F = F.type(torch.DoubleTensor) mdp_data = NNIRL_param_list[8] configuration_dict = NNIRL_param_list[9] truep = NNIRL_param_list[10] NLL_EVD_plots = NNIRL_param_list[11] example_samples = NNIRL_param_list[12] noisey_features = NNIRL_param_list[13] NLL = NLLFunction() # initialise NLL #assign constants NLL.F = F NLL.muE = muE NLL.mu_sa = mu_sa NLL.initD = initD NLL.mdp_data = mdp_data configuration_dict = task.connect( configuration_dict) #enabling configuration override by clearml start_time = time.time() #to time execution #tester = testers() #to use testing functions # lists for printing NLList = [] iterations = [] evdList = [] i = 0 #track iterations finalOutput = None #store final est R loss = 1000 #init loss diff = 1000 #init diff evd = 10 #init val #noisey_features=True if noisey_features: #add noise to features at states 12, 34 and 64 (when mdp_params.n=8) #set each states features to all 0 print('\n... adding noise to features at states 12, 34 and 64 ...\n') X[11, :] = torch.zeros(X.size()[1]) X[33, :] = torch.zeros(X.size()[1]) X[63, :] = torch.zeros(X.size()[1]) #if noisey_paths: # print('\n... adding noise to paths at states 12, 34 and 64 ...\n') if (optim_type == 'Adam'): print('\nOptimising with torch.Adam\n') optimizer = torch.optim.Adam( net.parameters(), lr=configuration_dict.get('base_lr'), weight_decay=1e-2) #weight decay for l2 regularisation #while(evd > threshold): #termination criteria: evd threshold #for p in range(configuration_dict.get('number_of_epochs')): #termination criteria: no of iters in config dict while diff >= threshold: #termination criteria: loss diff #for p in range(1): #for testing prevLoss = loss #net.zero_grad() net.zero_grad(set_to_none=True) output = torch.empty(len(X[0]), 1, dtype=torch.double) indexer = 0 for j in range(len(X[0])): thisR = net(X[:, j].view(-1, len(X[:, j]))) output[indexer] = thisR indexer += 1 finalOutput = output loss = NLL.apply(output, initD, mu_sa, muE, F, mdp_data) #use this line for custom gradient #loss = likelihood(output, initD, mu_sa, muE, F, mdp_data) #use this line for auto gradient #tester.checkgradients_NN(output, NLL) # check gradients loss.backward() # propagate grad through network #nn.utils.clip_grad_norm_(net.parameters(), max_norm=2.0, norm_type=2) evd = NLL.calculate_EVD(truep, torch.matmul(X, output)) # calc EVD optimizer.step() #printline to show est R #print('{}: output:\n {} | EVD: {} | loss: {} '.format(i, torch.matmul(X, output).repeat(1, 5).detach() , evd, loss.detach() )) #printline to hide est R print('{}: | EVD: {} | loss: {} | diff {}'.format( i, evd, loss, diff)) # store metrics for printing NLList.append(loss) iterations.append(i) evdList.append(evd) finaloutput = output tensorboard_writer.add_scalar('loss', loss, i) tensorboard_writer.add_scalar('evd', evd, i) tensorboard_writer.add_scalar('diff', diff, i) i += 1 diff = abs(prevLoss - loss) else: print('\implement LBFGS\n') PATH = './NN_IRL.pth' torch.save(net.state_dict(), PATH) tensorboard_writer.close() if NLL_EVD_plots: # plot f, (ax1, ax2) = plt.subplots(1, 2, sharex=True) ax1.plot(iterations, NLList) ax1.plot(iterations, NLList, 'r+') ax1.set_title('NLL') ax2.plot(iterations, evdList) ax2.plot(iterations, evdList, 'r+') ax2.set_title('Expected Value Diff') #plt.show() print("\nruntime: --- %s seconds ---\n" % (time.time() - start_time)) return net, finalOutput, (time.time() - start_time)