Exemplo n.º 1
0
    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        action_str = self.action_names()[action]

        if action_str == END:
            self._took_end_action = True
            self._success = self._is_goal_in_range()
            self.last_action_success = self._success
        else:
            self.env.step({"action": action_str})
            self.last_action_success = self.env.last_action_success
            pose = self.env.agent_state()
            self.path.append({k: pose[k] for k in ["x", "y", "z"]})
            self.task_info["followed_path"].append(pose)
        if len(self.path) > 1:
            self.travelled_distance += IThorEnvironment.position_dist(
                p0=self.path[-1], p1=self.path[-2], ignore_y=True)
        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={
                "last_action_success": self.last_action_success,
                "action": action
            },
        )
        return step_result
Exemplo n.º 2
0
    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        action_str = self.class_action_names()[action]

        if action_str == END:
            self._took_end_action = True
            self._success = self.is_goal_object_visible()
            self.last_action_success = self._success
        else:
            self.env.step({"action": action_str})
            self.last_action_success = self.env.last_action_success

            if (
                not self.last_action_success
            ) and self._CACHED_LOCATIONS_FROM_WHICH_OBJECT_IS_VISIBLE is not None:
                self.env.update_graph_with_failed_action(failed_action=action_str)

            self.task_info["followed_path"].append(self.env.get_agent_location())

        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={"last_action_success": self.last_action_success},
        )
        return step_result
    def _step(self, action: Tuple[int, int]) -> RLStepResult:
        assert isinstance(action, tuple)
        action_str1 = self.action_names()[action[0]]
        action_str2 = self.action_names()[action[1]]

        self.env.step({"action": action_str1, "agentId": 0})
        self.last_action_success1 = self.env.last_action_success
        self.env.step({"action": action_str2, "agentId": 1})
        self.last_action_success2 = self.env.last_action_success

        pose1 = self.env.agent_state(0)
        self.task_info["followed_path1"].append(pose1)
        pose2 = self.env.agent_state(1)
        self.task_info["followed_path2"].append(pose2)

        self.last_geodesic_distance = self.env.distance_cache.find_distance(
            self.env.scene_name,
            {k: pose1[k] for k in ["x", "y", "z"]},
            {k: pose2[k] for k in ["x", "y", "z"]},
            self.env.distance_from_point_to_point,
        )

        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={
                "last_action_success": [
                    self.last_action_success1,
                    self.last_action_success2,
                ],
                "action": action,
            },
        )
        return step_result
Exemplo n.º 4
0
    def _step(self, action: int) -> RLStepResult:

        action_str = self.class_action_names()[action]

        self._last_action_str = action_str
        action_dict = {"action": action_str}
        object_id = self.task_info["objectId"]
        if action_str == PICKUP:
            action_dict = {**action_dict, "object_id": object_id}
        self.env.step(action_dict)
        self.last_action_success = self.env.last_action_success

        last_action_name = self._last_action_str
        last_action_success = float(self.last_action_success)
        self.action_sequence_and_success.append((last_action_name, last_action_success))
        self.visualize(last_action_name)

        # If the object has not been picked up yet and it was picked up in the previous step update parameters to integrate it into reward
        if not self.object_picked_up:
            if (
                object_id
                in self.env.controller.last_event.metadata["arm"]["pickupableObjects"]
            ):
                self.env.step(dict(action="PickupObject"))
                #  we are doing an additional pass here, label is not right and if we fail we will do it twice
                object_inventory = self.env.controller.last_event.metadata["arm"][
                    "heldObjects"
                ]
                if len(object_inventory) > 0 and object_id not in object_inventory:
                    self.env.step(dict(action="ReleaseObject"))

            if self.env.is_object_at_low_level_hand(object_id):
                self.object_picked_up = True
                self.eplen_pickup = (
                    self._num_steps_taken + 1
                )  # plus one because this step has not been counted yet

        if self.object_picked_up:

            object_state = self.env.get_object_by_id(object_id)
            goal_state = self.task_info["target_location"]
            goal_achieved = self.object_picked_up and self.obj_state_aproximity(
                object_state, goal_state
            )
            if goal_achieved:
                self._took_end_action = True
                self.last_action_success = goal_achieved
                self._success = goal_achieved

        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={"last_action_success": self.last_action_success},
        )
        return step_result
Exemplo n.º 5
0
    def _step(self, action: Sequence[float]) -> RLStepResult:
        action = np.array(action)

        gym_obs, reward, self._gym_done, info = self.env.step(action=action)

        return RLStepResult(
            observation=self.get_observations(gym_obs=gym_obs),
            reward=reward,
            done=self.is_done(),
            info=info,
        )
Exemplo n.º 6
0
 def step(self, action: int) -> RLStepResult:
     step_result = super().step(action=action)
     if self.greedy_expert is not None:
         self.greedy_expert.update(
             action_taken=action,
             action_success=step_result.info["action_success"])
     step_result = RLStepResult(
         observation=self.get_observations(),
         reward=step_result.reward,
         done=step_result.done,
         info=step_result.info,
     )
     return step_result
Exemplo n.º 7
0
    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:

        assert isinstance(action, int)
        action = cast(int, action)

        action_str = self.action_names()[action]

        if action_str == END:
            self._took_end_action = True
            self._success = self._is_goal_in_range()
            self.last_action_success = self._success
        elif action_str in [
                DIRECTIONAL_AHEAD_PUSH, DIRECTIONAL_BACK_PUSH,
                DIRECTIONAL_RIGHT_PUSH, DIRECTIONAL_LEFT_PUSH
        ]:
            angle = [0.001, 180, 90, 270][action - 5]
            obj = self.env.moveable_closest_obj_by_types(
                self.task_info["obstacles_types"])
            if obj != None:
                self.env.step({
                    "action": action_str,
                    "objectId": obj["objectId"],
                    "moveMagnitude": obj["mass"] * 100,
                    "pushAngle": angle,
                    "autoSimulation": False
                })
                self.last_action_success = self.env.last_action_success
            else:
                self.last_action_success = False
        elif action_str in [LOOK_UP, LOOK_DOWN]:
            self.env.step({"action": action_str})
            self.last_action_success = self.env.last_action_success
        else:
            self.env.step({"action": action_str})
            self.last_action_success = self.env.last_action_success
            pose = self.env.agent_state()
            self.path.append({k: pose[k] for k in ["x", "y", "z"]})
            self.task_info["followed_path"].append(pose)
        if len(self.path) > 1 and self.path[-1] != self.path[-2]:
            self.num_moves_made += 1
        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={
                "last_action_success": self.last_action_success,
                "action": action
            },
        )

        return step_result
Exemplo n.º 8
0
    def _step(self, action: int) -> RLStepResult:
        assert isinstance(action, int)

        minigrid_obs, reward, done, info = self.env.step(action=action)
        self._last_action = action

        self._was_successful = done and reward > 0

        return RLStepResult(
            observation=self.get_observations(
                minigrid_output_obs=minigrid_obs),
            reward=reward,
            done=self.is_done(),
            info=info,
        )
Exemplo n.º 9
0
    def _step(self, action: int) -> RLStepResult:
        """Take a step in the task.

        # Parameters
        action: is the index of the action from self.action_names()
        """
        # parse the action data
        action_name = self.action_names()[action]

        if action_name.startswith("pickup"):
            # Don't allow the exploration agent to pickup objects
            action_success = False

        elif action_name.startswith("open_by_type"):
            # Don't allow the exploration agent to open objects
            action_success = False

        elif action_name.startswith(
            ("move", "rotate", "look", "stand", "crouch")):
            # take the movement action
            action_success = getattr(self.walkthrough_env, action_name)()

        elif action_name == "drop_held_object_with_snap":
            # Don't allow the exploration agent to drop objects (not that it can hold any)
            action_success = False

        elif action_name == "done":
            self._took_end_action = True
            action_success = True

        else:
            raise RuntimeError(
                f"Action '{action_name}' is not in the action space {RearrangeActionSpace}"
            )

        self.actions_taken.append(action_name)
        self.actions_taken_success.append(action_success)

        return RLStepResult(
            observation=self.get_observations(),
            reward=self._judge(action_name=action_name,
                               action_success=action_success),
            done=self.is_done(),
            info={
                "action_name": action_name,
                "action_success": action_success
            },
        )
Exemplo n.º 10
0
    def _step(self, action: int) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        minigrid_obs, reward, self._minigrid_done, info = self.env.step(
            action=self._ACTION_IND_TO_MINIGRID_IND[action])

        # self.env.render()

        return RLStepResult(
            observation=self.get_observations(
                minigrid_output_obs=minigrid_obs),
            reward=reward,
            done=self.is_done(),
            info=info,
        )
Exemplo n.º 11
0
    def _step(self, action: Dict[str, int]) -> RLStepResult:
        assert len(action) == 2, "got action={}".format(action)
        minigrid_obs, reward, self._minigrid_done, info = self.env.step(
            action=(
                self._ACTION_IND_TO_MINIGRID_IND[action["lower"] + 2 * action["higher"]]
            )
        )

        # self.env.render()

        return RLStepResult(
            observation=self.get_observations(minigrid_output_obs=minigrid_obs),
            reward=reward,
            done=self.is_done(),
            info=info,
        )
Exemplo n.º 12
0
    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        old_pos = self.get_observations()["agent_position_and_rotation"]

        action_str = self.action_names()[action]
        self._actions_taken.append(action_str)

        self.env.step({"action": action_str})

        # if action_str != END:
        #     self.env.step({"action": action_str})

        # if self.env.env.get_metrics()['distance_to_goal'] <= 0.2:
        #     self._took_end_action = True
        #     self._success = self.env.env.get_metrics()['distance_to_goal'] <= 0.2
        #     self.last_action_success = self._success
        # else:
        #     self.last_action_success = self.env.last_action_success

        if action_str == END:
            self._took_end_action = True
            self._success = self._is_goal_in_range()
            self.last_action_success = self._success
        else:
            self.last_action_success = self.env.last_action_success

        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={"last_action_success": self.last_action_success},
        )
        new_pos = self.get_observations()["agent_position_and_rotation"]
        if np.all(old_pos == new_pos):
            self._num_invalid_actions += 1

        pos = self.get_observations()["agent_position_and_rotation"]
        self._positions.append({
            "x": pos[0],
            "y": pos[1],
            "z": pos[2],
            "rotation": pos[3]
        })

        return step_result
Exemplo n.º 13
0
    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        self.env.step(action)
        reward = STEP_PENALTY

        if np.all(self.env.current_position == self.env.goal_position):
            self._found_target = True
            reward += FOUND_TARGET_REWARD
        elif self.num_steps_taken() == self.max_steps - 1:
            reward = STEP_PENALTY / (1 - DISCOUNT_FACTOR)

        return RLStepResult(
            observation=self.get_observations(),
            reward=reward,
            done=self.is_done(),
            info=None,
        )
Exemplo n.º 14
0
    def _step(self, action: int) -> RLStepResult:

        action_str = self.class_action_names()[action]

        self._last_action_str = action_str
        action_dict = {"action": action_str}
        object_id = self.task_info["objectId"]
        if action_str == PICKUP:
            action_dict = {**action_dict, "object_id": object_id}
        self.env.step(action_dict)
        self.last_action_success = self.env.last_action_success

        last_action_name = self._last_action_str
        last_action_success = float(self.last_action_success)
        self.action_sequence_and_success.append(
            (last_action_name, last_action_success))
        self.visualize(last_action_name)

        # If the object has not been picked up yet and it was picked up in the previous step update parameters to integrate it into reward
        if not self.object_picked_up:

            if self.env.is_object_at_low_level_hand(object_id):
                self.object_picked_up = True
                self.eplen_pickup = (
                    self._num_steps_taken + 1
                )  # plus one because this step has not been counted yet

        if action_str == DONE:
            self._took_end_action = True
            object_state = self.env.get_object_by_id(object_id)
            goal_state = self.task_info["target_location"]
            goal_achieved = self.object_picked_up and self.obj_state_aproximity(
                object_state, goal_state)
            self.last_action_success = goal_achieved
            self._success = goal_achieved

        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={"last_action_success": self.last_action_success},
        )
        return step_result
Exemplo n.º 15
0
    def _step(self, action: Union[int, Sequence[int]]) -> RLStepResult:
        assert isinstance(action, int)
        action = cast(int, action)

        action_str = self.class_action_names()[action]

        self.env.step({"action": action_str})

        if action_str == END:
            self._took_end_action = True
            self._success = self._is_goal_in_range()
            self.last_action_success = self._success
        else:
            self.last_action_success = self.env.last_action_success

        step_result = RLStepResult(
            observation=self.get_observations(),
            reward=self.judge(),
            done=self.is_done(),
            info={"last_action_success": self.last_action_success},
        )
        return step_result
Exemplo n.º 16
0
    def _step(self, action: int) -> RLStepResult:
        """
        action : is the index of the action from self.action_names()
        """
        # parse the action data
        action_name = self.action_names()[action]

        if action_name.startswith("pickup"):
            # NOTE: due to the object_id's not being in the metadata for speedups,
            # they cannot be targeted with interactible actions. Hence, why
            # we're resetting the object filter before targeting by object id.

            with include_object_data(self.unshuffle_env.controller):
                metadata = self.unshuffle_env.last_event.metadata

                if len(metadata["inventoryObjects"]) != 0:
                    action_success = False
                else:
                    object_type = stringcase.pascalcase(
                        action_name.replace("pickup_", ""))
                    possible_objects = [
                        o for o in metadata["objects"]
                        if o["visible"] and o["objectType"] == object_type
                    ]

                    possible_objects = sorted(possible_objects,
                                              key=lambda po:
                                              (po["distance"], po["name"]))
                    object_before = None
                    if len(possible_objects) > 0:
                        object_before = possible_objects[0]
                        object_id = object_before["objectId"]

                    if object_before is not None:
                        self.unshuffle_env.controller.step(
                            "PickupObject",
                            objectId=object_id,
                            **self.unshuffle_env.physics_step_kwargs,
                        )
                        action_success = self.unshuffle_env.last_event.metadata[
                            "lastActionSuccess"]
                    else:
                        action_success = False

                    if action_success and self.unshuffle_env.held_object is None:
                        get_logger().warning(
                            f"`PickupObject` was successful in picking up {object_id} but we're not holding"
                            f" any objects! Current task spec:\n{self.unshuffle_env.current_task_spec}."
                        )
                        action_success = False

        elif action_name.startswith("open_by_type"):
            object_type = stringcase.pascalcase(
                action_name.replace("open_by_type_", ""))
            with include_object_data(self.unshuffle_env.controller):

                obj_name_to_goal_and_cur_poses = {
                    cur_pose["name"]: (goal_pose, cur_pose)
                    for _, goal_pose, cur_pose in zip(
                        *self.unshuffle_env.poses)
                }

                goal_pose = None
                cur_pose = None
                for o in self.unshuffle_env.last_event.metadata["objects"]:
                    if (o["visible"] and o["objectType"] == object_type
                            and o["openable"]
                            and not self.unshuffle_env.are_poses_equal(
                                *obj_name_to_goal_and_cur_poses[o["name"]])):
                        goal_pose, cur_pose = obj_name_to_goal_and_cur_poses[
                            o["name"]]
                        break

                if goal_pose is not None:
                    object_id = cur_pose["objectId"]
                    goal_openness = goal_pose["openness"]

                    if cur_pose["openness"] > 0.0:
                        self.unshuffle_env.controller.step(
                            "CloseObject",
                            objectId=object_id,
                            **self.unshuffle_env.physics_step_kwargs,
                        )

                    self.unshuffle_env.controller.step(
                        "OpenObject",
                        objectId=object_id,
                        openness=goal_openness,
                        **self.unshuffle_env.physics_step_kwargs,
                    )
                    action_success = self.unshuffle_env.last_event.metadata[
                        "lastActionSuccess"]
                else:
                    action_success = False

        elif action_name.startswith(
            ("move", "rotate", "look", "stand", "crouch")):
            # apply to only the unshuffle env as the walkthrough agent's position
            # must now be managed by the whichever sensor is trying to read data from it.
            action_success = getattr(self.unshuffle_env, action_name)()
        elif action_name == "drop_held_object_with_snap":
            action_success = getattr(self.unshuffle_env, action_name)()
        elif action_name == "done":
            self._took_end_action = True
            action_success = True
        elif action_name == "pass":
            action_success = True
        else:
            raise RuntimeError(
                f"Action '{action_name}' is not in the action space {RearrangeActionSpace}"
            )

        self.actions_taken.append(action_name)
        self.actions_taken_success.append(action_success)
        if self.task_spec_in_metrics:
            self.agent_locs.append(self.unshuffle_env.get_agent_location())
        return RLStepResult(
            observation=None,
            reward=self._judge(),
            done=self.is_done(),
            info={
                "action_name": action_name,
                "action_success": action_success
            },
        )