def __init__(self, num_inputs, action_space, param, action_bound): self.action_bound = action_bound self.alpha = param['alpha'] self.gamma = param['gamma'] self.tau = param['tau'] self.target_update_interval = param['target_update_interval'] self.automatic_entropy_tuning = param['automatic_entropy_tuning'] self.lr = param['lr'] self.device = torch.device("cuda" if param['cuda'] else "cpu") self.critic = QNetwork(num_inputs, action_space).to(device=self.device) self.critic_optim = Adam(self.critic.parameters(), lr=self.lr) self.critic_target = QNetwork(num_inputs, action_space).to(self.device) hard_update(self.critic_target, self.critic) # Target Entropy = -dim(A) (e.g. , -6 for HalfCheetah-v2) as given in the paper if self.automatic_entropy_tuning is True: self.target_entropy = -torch.prod( torch.Tensor(action_space).to(self.device)).item() self.log_alpha = torch.zeros(1, requires_grad=True, device=self.device) self.alpha_optim = Adam([self.log_alpha], lr=param['lr']) self.policy = CNNPolicy(num_inputs, action_space).to(self.device) self.policy_optim = Adam(self.policy.parameters(), lr=param['lr'])
print('############Loading Policy Model###########') print('###########################################') state_dict = torch.load(file_policy) agent.policy.load_state_dict(state_dict) else: print('###########################################') print('############Start policy Training###########') print('###########################################') if os.path.exists(file_critic_1): print('###########################################') print('############Loading critic_1 Model###########') print('###########################################') state_dict = torch.load(file_critic_1) agent.critic_1.load_state_dict(state_dict) hard_update(agent.critic_1_target, agent.critic_1) else: print('###########################################') print('############Start critic_1 Training###########') print('###########################################') if os.path.exists(file_critic_2): print('###########################################') print('############Loading critic_2 Model###########') print('###########################################') state_dict = torch.load(file_critic_2) agent.critic_2.load_state_dict(state_dict) hard_update(agent.critic_2_target, agent.critic_2) else: print('###########################################')
# np.random.seed(1) if rank == 0: policy_path = 'policy_0819' #actor actor = Actor(frames=LASER_HIST, action_space=2, max_action=MAX_ACTION) actor.cuda() actor_opt = Adam(actor.parameters(), lr=ACTOR_LEARNING_RATE) actor_target = Actor(frames=LASER_HIST, action_space=2, max_action=MAX_ACTION) actor_target.cuda() hard_update(actor_target, actor) #critic critic = Critic(frames=LASER_HIST, action_space=2) critic.cuda() critic_opt = Adam(critic.parameters(), lr=CRITIC_LEARNING_RATE) critic_target = Critic(frames=LASER_HIST, action_space=2) critic_target.cuda() hard_update(critic_target, critic) if not os.path.exists(policy_path): os.makedirs(policy_path)