Exemplo n.º 1
0
# agent convert to torch model
import policy_torch
baselines_mlp_model =TD3.load('trained_agent/accac_final_1000000_9_11_20_25.zip')
agent = policy_torch.copy_mlp_weights(baselines_mlp_model,layers=[512,512],n_inputs=32)

# loading enviorment, same as training
env=firefly_accac.FireflyAccAc(arg)
# ---seting the env for inverse----
# TODO, move it to a function of env
env.agent_knows_phi=False


for i in range(10):
    filename=(str(time.localtime().tm_mday)+'_'+str(time.localtime().tm_hour)+'_'+str(time.localtime().tm_min))
    single_theta_inverse(arg, env, agent, filename, 
                    number_updates=number_updates,
                    true_theta=None, 
                    phi=None,
                    init_theta=None,
                    states=None, 
                    actions=None, 
                    trajectory_data=None,
                    use_H=False,
                    tasks=None,

                    )
print('done')


Exemplo n.º 2
0
#         [0.15],
#         [1],
#         [0.03],
#         [0.03]]).cuda()

for i in range(1):
    filename = (str(time.localtime().tm_mday) + '_' +
                str(time.localtime().tm_hour) + '_' +
                str(time.localtime().tm_min))
    single_theta_inverse(
        arg,
        env,
        agent,
        filename,
        number_updates=number_updates,
        true_theta=None,
        phi=None,
        init_theta=None,
        states=None,
        actions=None,
        trajectory_data=None,
        use_H=False,
        tasks=None,
        is1d=True,
        gpu=False
        # fixed_param_ind=[5,6],
        # assign_true_param=[5,6],
        #     task=[torch.tensor([0.7]).cuda()],
    )
print('done')
Exemplo n.º 3
0
    'trained_agent//acc_retrain_1000000_2_18_21_4.zip')
agent = policy_torch.copy_mlp_weights(baselines_mlp_model,
                                      layers=[128, 128],
                                      n_inputs=30)

# loading enviorment, same as training
env = firefly_acc.FireflyAcc(arg)
# ---seting the env for inverse----
# TODO, move it to a function of env
env.agent_knows_phi = False

for i in range(10):
    filename = ("test_acc_EP" + str(arg.NUM_EP) + "updates" +
                str(number_updates) + "lr" + str(arg.ADAM_LR) + 'step' +
                str(arg.LR_STEP) + str(time.localtime().tm_mday) + '_' +
                str(time.localtime().tm_hour) + '_' +
                str(time.localtime().tm_min))
    single_theta_inverse(arg,
                         env,
                         agent,
                         filename,
                         number_updates=number_updates,
                         true_theta=None,
                         phi=None,
                         init_theta=None,
                         states=None,
                         actions=None,
                         tasks=None)

print('done')
Exemplo n.º 4
0
[[0.2207319438457489],
 [1.062300205230713],
 [0.32934996485710144],
 [0.1929050236940384],
 [0.19170257449150085],
 [0.1894093006849289],
 [0.16225792467594147],
 [0.05502069741487503],
 [0.6376186013221741],
 [0.7159334421157837]]
         )

for i in range(1):
    filename=(str(time.localtime().tm_mday)+'_'+str(time.localtime().tm_hour)+'_'+str(time.localtime().tm_min))
    single_theta_inverse(arg, env, agent, filename, 
                    number_updates=number_updates,
                    true_theta=None, 
                    phi=phi,
                    init_theta=theta,
                    trajectory_data=(states, actions, tasks),
                    use_H=False,
                    is1d=False,
                    gpu=False,
                    # fixed_param_ind=[1,2,5,6],
                    # assign_true_param=[1,2,5,6],
                    action_var=0.001
                    )