def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) action_str = self.class_action_names()[action] if action_str == END: self._took_end_action = True self._success = self._is_goal_object_visible() self.last_action_success = self._success else: self.env.step({"action": action_str}) self.last_action_success = self.env.last_action_success if ( not self.last_action_success ) and self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE is not None: self.env.update_graph_with_failed_action( failed_action=action_str) step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={"last_action_success": self.last_action_success}, ) return step_result
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) action_str = self.action_names()[action] if action_str == END: self._took_end_action = True self._success = self._is_goal_in_range() self.last_action_success = self._success else: self.env.step({"action": action_str}) self.last_action_success = self.env.last_action_success pose = self.env.agent_state() self.path.append({k: pose[k] for k in ["x", "y", "z"]}) self.task_info["followed_path"].append(pose) if len(self.path) > 1 and self.path[-1] != self.path[-2]: self.num_moves_made += 1 step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={ "last_action_success": self.last_action_success, "action": action }, ) return step_result
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) minigrid_obs, reward, done, info = self.env.step(action=action) self._last_action = action self._was_successful = done and reward > 0 return RLStepResult( observation=self.get_observations( minigrid_output_obs=minigrid_obs), reward=reward, done=self.is_done(), info=info, )
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) old_pos = self.get_observations()["agent_position_and_rotation"] action_str = self.action_names()[action] self._actions_taken.append(action_str) self.env.step({"action": action_str}) # if action_str != END: # self.env.step({"action": action_str}) # if self.env.env.get_metrics()['distance_to_goal'] <= 0.2: # self._took_end_action = True # self._success = self.env.env.get_metrics()['distance_to_goal'] <= 0.2 # self.last_action_success = self._success # else: # self.last_action_success = self.env.last_action_success if action_str == END: self._took_end_action = True self._success = self._is_goal_in_range() self.last_action_success = self._success else: self.last_action_success = self.env.last_action_success step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={"last_action_success": self.last_action_success}, ) new_pos = self.get_observations()["agent_position_and_rotation"] if np.all(old_pos == new_pos): self._num_invalid_actions += 1 pos = self.get_observations()["agent_position_and_rotation"] self._positions.append({ "x": pos[0], "y": pos[1], "z": pos[2], "rotation": pos[3] }) return step_result
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: # if self.num_steps_taken() == 0: # self.env.render() assert isinstance(action, int) action = cast(int, action) minigrid_obs, reward, self._minigrid_done, info = self.env.step( action=self._ACTION_IND_TO_MINIGRID_IND[action]) # self.env.render() return RLStepResult( observation=self.get_observations( minigrid_output_obs=minigrid_obs), reward=reward, done=self.is_done(), info=info, )
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) success = self.env.step(action) reward = STEP_PENALTY if np.all(self.env.current_position == self.env.goal_position): self._found_target = True reward += FOUND_TARGET_REWARD elif self.num_steps_taken() == self.max_steps - 1: reward = STEP_PENALTY / (1 - DISCOUNT_FACTOR) return RLStepResult( observation=self.get_observations(), reward=reward, done=self.is_done(), info=None, )
def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult: assert isinstance(action, int) action = cast(int, action) action_str = self.class_action_names()[action] self.env.step({"action": action_str}) if action_str == END: self._took_end_action = True self._success = self._is_goal_in_range() self.last_action_success = self._success else: self.last_action_success = self.env.last_action_success step_result = RLStepResult( observation=self.get_observations(), reward=self.judge(), done=self.is_done(), info={"last_action_success": self.last_action_success}, ) return step_result