def compute_loss(env, agent, theta_estimation, true_theta, phi, trajectory_data=None, num_episodes=100,is1d=False): if trajectory_data is None: states, actions, tasks=trajectory(agent, torch.Tensor(phi), torch.Tensor(true_theta), env, num_episodes, is1d=is1d) else: actions=trajectory_data['actions'] tasks=trajectory_data['tasks'] theta_estimation=torch.nn.Parameter(torch.Tensor(theta_estimation)) loss = getLoss(agent, actions, tasks, torch.Tensor(phi), theta_estimation, env,states=states) return loss
env = Model(arg) # build an environment agent = Agent(env.state_dim, env.action_dim, arg, filename, hidden_dim=128, gamma=DISCOUNT_FACTOR, tau=0.001, device = "cpu") agent.load(filename) true_theta_log = [] final_theta_log = [] stderr_log = [] result_log = [] for num_thetas in range(10): true_theta = reset_theta(arg.gains_range, arg.std_range, arg.goal_radius_range) true_theta_log.append(true_theta.data.clone()) x_traj, obs_traj, a_traj, _ = trajectory(agent, true_theta, arg.INVERSE_BATCH_SIZE, env, arg, arg.gains_range, arg.std_range, arg.goal_radius_range) # generate true trajectory true_loss = getLoss(agent, x_traj, obs_traj, a_traj, true_theta, env, arg.gains_range, arg.std_range) # this is the lower bound of loss? #theta = nn.Parameter(true_theta.data.clone()+0.5*true_theta.data.clone()) theta = nn.Parameter(reset_theta(arg.gains_range, arg.std_range, arg.goal_radius_range)) ini_theta = theta.data.clone() loss_log = deque(maxlen=5000) theta_log = deque(maxlen=5000) optT = torch.optim.Adam([theta], lr=1e-3) prev_loss = 100000 loss_diff = deque(maxlen=5) for num_batches in range(5000):
arg.DELTA_T=0.2 arg.EPISODE_LEN=35 a=load_inverse_data('17_21_34') theta_trajectory=a['theta_estimations'] true_theta=a['true_theta'] theta_estimation=theta_trajectory[-1] phi=np.array(a['phi']) # no bg, faster env=ffac_1d.FireflyTrue1d_cpu(arg) baselines_mlp_model =TD3_torch.TD3.load('trained_agent/1d_1000000_9_16_22_20.zip') agent=baselines_mlp_model.actor agent.cpu() agent.requires_grad=False is1d=True H_dim=7 num_episodes=100 states, actions, tasks=trajectory(agent, torch.Tensor(phi), torch.Tensor(true_theta), env, num_episodes,is1d=is1d) theta_estimation=torch.nn.Parameter(torch.Tensor(theta_estimation)) phi=torch.nn.Parameter(torch.Tensor(phi)) phi.requires_grad=False loss = getLoss(agent, actions, tasks, phi, theta_estimation, env,states=states, gpu=False) grads = torch.autograd.grad(loss, theta_estimation, create_graph=True,allow_unused=True)[0] print(grads) H = torch.zeros(H_dim,H_dim) for i in range(H_dim): print(i) H[i] = torch.autograd.grad(grads[i], theta_estimation, retain_graph=True,allow_unused=True)[0].view(-1)
def single_inverse(true_theta, arg, env, agent, x_traj, a_traj, true_loss, filename, n, Pro_Noise = None, Obs_Noise = None, part_theta=False): tic = time.time() if Pro_Noise is not None: Pro_Noise = true_theta[2:4] if Obs_Noise is not None: Obs_Noise = true_theta[6:8] #rndsgn = torch.sign(torch.randn(1,len(true_theta))).view(-1) #purt= torch.Tensor([0.1,0.1,1,1,0.1,0.1,1,1,0.1])#perturbation #theta = nn.Parameter(true_theta.data.clone()+purt *torch.randn(1,len(true_theta)).view(-1)) #theta = theta_range(theta, arg.gains_range, arg.noise_range, arg.goal_radius_range) # keep inside of trained range # just for checking #theta = nn.Parameter(true_theta.data.clone()) # just for checking theta = nn.Parameter(reset_theta(arg.gains_range, arg.noise_range, arg.goal_radius_range)) ini_theta = theta.data.clone() loss_log = deque(maxlen=arg.NUM_IT) loss_log_recent = deque(maxlen=100) #loss_act_log = deque(maxlen=arg.NUM_IT) #loss_obs_log = deque(maxlen=arg.NUM_IT) theta_log = deque(maxlen=arg.NUM_IT) optT = torch.optim.Adam([theta], lr=arg.ADAM_LR) for it in tqdm(range(arg.NUM_IT)): loss, loss_act, loss_obs = getLoss(agent, x_traj, a_traj, theta, env, arg.gains_range, arg.noise_range, arg.PI_STD, arg.NUM_SAMPLES) loss_log.append(loss.data) loss_log_recent.append(loss.data) #loss_act_log.append(loss_act.data) #loss_obs_log.append(loss_obs.data) optT.zero_grad() #clears old gradients from the last step loss.backward(retain_graph=True) #computes the derivative of the loss w.r.t. the parameters using backpropagation optT.step() # performing single optimize step: this changes theta if part_theta == False: theta = theta_range(theta, arg.gains_range, arg.noise_range, arg.goal_radius_range) # keep inside of trained range elif part_theta == True: theta = theta_range(theta, arg.gains_range, arg.noise_range, arg.goal_radius_range, Pro_Noise, Obs_Noise) # keep inside of trained range theta_log.append(theta.data.clone()) if it%5 == 0: #print("num_theta:{}, num:{}, loss:{}".format(n, it, np.round(loss.data.item(), 6))) #print("num:{},theta diff sum:{}".format(it, 1e6 * (true_theta - theta.data.clone()).sum().data)) print("num_theta:{}, num:{}, loss:{}, true loss:{},\n true_theta:{}, \n converged_theta:{}\n".format(n, it,np.round(loss.data.item(), 6),np.round(true_loss.data.item(), 6),true_theta.data, theta.data)) if it%50 == 0 and it >0: plt.plot(loss_log) plt.title("it:{}".format(it)) plt.savefig('../firefly-inverse-data/data/'+filename +str(n)+'_loss.png') """ if it >1000 and it%10==0: if np.mean(loss_log_recent) < true_loss: break """ toc = time.time() # print((toc - tic)/60/60, "hours") """ loss, _, _ = getLoss(agent, x_traj, a_traj, theta, env, arg.gains_range, arg.noise_range, arg.PI_STD, arg.NUM_SAMPLES) #print("loss:{}".format(loss)) grads = grad(loss, theta, create_graph=True)[0] H = torch.zeros(9,9) for i in range(9): H[i] = grad(grads[i], theta, retain_graph=True)[0] I = H.inverse() stderr = torch.sqrt(torch.abs(I).diag()) stderr_ii = 1/torch.sqrt(torch.abs(H.diag())) """ result = {'true_theta': true_theta, 'initial_theta': ini_theta, 'x_traj': x_traj, 'a_traj': a_traj, 'theta': theta, 'theta_log': theta_log, 'loss_log': loss_log, 'true_loss': true_loss, 'filename': filename, 'num_theta': n, 'converging_it': it, 'duration': toc-tic, 'arguments': arg} #'stderr': stderr, #'stderr_ii': stderr_ii #} #'loss_act_log': loss_act_log, #'loss_obs_log': loss_obs_log, torch.save(result, '../firefly-inverse-data/data/' + filename + str(n)+ str(arg.NUM_thetas) + "EP" + str(arg.NUM_EP) + str(np.around(arg.PI_STD, decimals=2)) + str(arg.NUM_SAMPLES) + "IT" + str(arg.NUM_IT) + str(arg.SEED_NUMBER) +'_single_result_part.pkl') return result