def act(self, state, explore=True): state = copy.deepcopy(state) state["low_dim_states"] = np.float32( np.append(state["low_dim_states"], self.prev_action)) state["social_vehicles"] = (torch.from_numpy( state["social_vehicles"]).unsqueeze(0).to(self.device)) state["low_dim_states"] = (torch.from_numpy( state["low_dim_states"]).unsqueeze(0).to(self.device)) self.actor.eval() action = self.actor(state).cpu().data.numpy().flatten() noise = [self.noise[0].sample(), self.noise[1].sample()] if explore: action[0] += noise[0] action[1] += noise[1] self.actor.train() action_low, action_high = ( self.action_low.data.cpu().numpy(), self.action_high.data.cpu().numpy(), ) action = np.clip(action, action_low, action_high)[0] return to_3d_action(action)
def act(self, state, explore=True): self.actor.eval() state = self.state_preprocessor( state=state, normalize=True, unsqueeze=True, device=self.device, social_capacity=self.social_capacity, observation_num_lookahead=self.observation_num_lookahead, social_vehicle_config=self.social_vehicle_config, prev_action=self.prev_action, ) # print(state) action = self.actor(state).cpu().data.numpy().flatten() noise = [self.noise[0].sample(), self.noise[1].sample()] if explore: action[0] += noise[0] action[1] += noise[1] self.actor.train() action_low, action_high = ( self.action_low.data.cpu().numpy(), self.action_high.data.cpu().numpy(), ) action = np.clip(action, action_low, action_high)[0] return to_3d_action(action)
def act(self, state, explore=True): state = self.state_preprocessor( state=state, normalize=True, unsqueeze=True, device=self.device, social_capacity=self.social_capacity, observation_num_lookahead=self.observation_num_lookahead, social_vehicle_config=self.social_vehicle_config, prev_action=self.prev_action, ) with torch.no_grad(): dist, value = self.ppo_net(state) if explore: # training mode action = dist.sample() log_prob = dist.log_prob(action) self.current_log_prob = log_prob self.current_value = value action = torch.squeeze(action) action = action.data.cpu().numpy() else: # testing mode mean = torch.squeeze(dist.loc) action = mean.data.cpu().numpy() self.step_count += 1 return to_3d_action(action)
def act(self, state, explore=True): state = copy.deepcopy(state) state["low_dim_states"] = np.float32( np.append(state["low_dim_states"], self.prev_action)) state["social_vehicles"] = (torch.from_numpy( state["social_vehicles"]).unsqueeze(0).to(self.device)) state["low_dim_states"] = (torch.from_numpy( state["low_dim_states"]).unsqueeze(0).to(self.device)) action, _, mean = self.sac_net.sample(state) if explore: # training mode action = torch.squeeze(action, 0) action = action.detach().cpu().numpy() else: # testing mode mean = torch.squeeze(mean, 0) action = mean.detach().cpu().numpy() return to_3d_action(action)
def act(self, state, explore=True): state = self.state_preprocessor( state=state, normalize=True, unsqueeze=True, device=self.device_name, social_capacity=self.social_capacity, observation_num_lookahead=self.observation_num_lookahead, social_vehicle_config=self.social_vehicle_config, prev_action=self.prev_action, ) action, _, mean = self.sac_net.sample(state) if explore: # training mode action = torch.squeeze(action, 0) action = action.detach().cpu().numpy() else: # testing mode mean = torch.squeeze(mean, 0) action = mean.detach().cpu().numpy() return to_3d_action(action)
def act(self, state, explore=True): state["low_dim_states"] = np.float32( np.append(state["low_dim_states"], self.prev_action)) state["social_vehicles"] = (torch.from_numpy( state["social_vehicles"]).unsqueeze(0).to(self.device)) state["low_dim_states"] = (torch.from_numpy( state["low_dim_states"]).unsqueeze(0).to(self.device)) with torch.no_grad(): dist, value = self.ppo_net(x=state) if explore: # training mode action = dist.sample() log_prob = dist.log_prob(action) self.current_log_prob = log_prob self.current_value = value action = torch.squeeze(action) action = action.data.cpu().numpy() else: # testing mode mean = torch.squeeze(dist.loc) action = mean.data.cpu().numpy() self.step_count += 1 return to_3d_action(action)