def act(self, state): if self.body.env.clock.frame < self.training_start_step: return policy_util.random(state, self, self.body).cpu().squeeze().numpy() else: action = self.action_policy(state, self, self.body) if not self.body.is_discrete: action = self.scale_action(torch.tanh(action)) # continuous action bound return action.cpu().squeeze().numpy()
def act(self, state): if self.body.env.clock.frame < self.training_start_step: return policy_util.random(state, self, self.body).cpu().squeeze().numpy() else: action = self.action_policy(state, self, self.body) if self.body.is_discrete: # discrete output is RelaxedOneHotCategorical, need to sample to int action = torch.distributions.Categorical(probs=action).sample() else: action = torch.tanh(action) # continuous action bound return action.cpu().squeeze().numpy()