# agent convert to torch model import policy_torch baselines_mlp_model =TD3.load('trained_agent/accac_final_1000000_9_11_20_25.zip') agent = policy_torch.copy_mlp_weights(baselines_mlp_model,layers=[512,512],n_inputs=32) # loading enviorment, same as training env=firefly_accac.FireflyAccAc(arg) # ---seting the env for inverse---- # TODO, move it to a function of env env.agent_knows_phi=False for i in range(10): filename=(str(time.localtime().tm_mday)+'_'+str(time.localtime().tm_hour)+'_'+str(time.localtime().tm_min)) single_theta_inverse(arg, env, agent, filename, number_updates=number_updates, true_theta=None, phi=None, init_theta=None, states=None, actions=None, trajectory_data=None, use_H=False, tasks=None, ) print('done')
# [0.15], # [1], # [0.03], # [0.03]]).cuda() for i in range(1): filename = (str(time.localtime().tm_mday) + '_' + str(time.localtime().tm_hour) + '_' + str(time.localtime().tm_min)) single_theta_inverse( arg, env, agent, filename, number_updates=number_updates, true_theta=None, phi=None, init_theta=None, states=None, actions=None, trajectory_data=None, use_H=False, tasks=None, is1d=True, gpu=False # fixed_param_ind=[5,6], # assign_true_param=[5,6], # task=[torch.tensor([0.7]).cuda()], ) print('done')
'trained_agent//acc_retrain_1000000_2_18_21_4.zip') agent = policy_torch.copy_mlp_weights(baselines_mlp_model, layers=[128, 128], n_inputs=30) # loading enviorment, same as training env = firefly_acc.FireflyAcc(arg) # ---seting the env for inverse---- # TODO, move it to a function of env env.agent_knows_phi = False for i in range(10): filename = ("test_acc_EP" + str(arg.NUM_EP) + "updates" + str(number_updates) + "lr" + str(arg.ADAM_LR) + 'step' + str(arg.LR_STEP) + str(time.localtime().tm_mday) + '_' + str(time.localtime().tm_hour) + '_' + str(time.localtime().tm_min)) single_theta_inverse(arg, env, agent, filename, number_updates=number_updates, true_theta=None, phi=None, init_theta=None, states=None, actions=None, tasks=None) print('done')
[[0.2207319438457489], [1.062300205230713], [0.32934996485710144], [0.1929050236940384], [0.19170257449150085], [0.1894093006849289], [0.16225792467594147], [0.05502069741487503], [0.6376186013221741], [0.7159334421157837]] ) for i in range(1): filename=(str(time.localtime().tm_mday)+'_'+str(time.localtime().tm_hour)+'_'+str(time.localtime().tm_min)) single_theta_inverse(arg, env, agent, filename, number_updates=number_updates, true_theta=None, phi=phi, init_theta=theta, trajectory_data=(states, actions, tasks), use_H=False, is1d=False, gpu=False, # fixed_param_ind=[1,2,5,6], # assign_true_param=[1,2,5,6], action_var=0.001 )