Exemplos de Direction.move_in_direction_number em Python, exemplos de deep_rlsp.envs.gridworlds.env.Direction.move_in_direction_number em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: apples.py Projeto: HumanCompatibleAI/deep-rlsp

    def get_next_states(self, state, action):
        """Returns the next state given a state and an action."""
        action = int(action)
        orientation, x, y = state.agent_pos
        new_orientation, new_x, new_y = state.agent_pos
        new_tree_states = deepcopy(state.tree_states)
        new_bucket_states = deepcopy(state.bucket_states)
        new_carrying_apple = state.carrying_apple

        if action == Direction.get_number_from_direction(Direction.STAY):
            pass
        elif action < len(Direction.ALL_DIRECTIONS):
            new_orientation = action
            move_x, move_y = Direction.move_in_direction_number((x, y), action)
            # New position is legal
            if (0 <= move_x < self.width and 0 <= move_y < self.height
                    and (move_x, move_y) in self.possible_agent_locations):
                new_x, new_y = move_x, move_y
            else:
                # Move only changes orientation, which we already handled
                pass
        elif action == 5:
            obj_pos = Direction.move_in_direction_number((x, y), orientation)
            if state.carrying_apple:
                # We always drop the apple
                new_carrying_apple = False
                # If we're facing a bucket, it goes there
                if obj_pos in new_bucket_states:
                    prev_apples = new_bucket_states[obj_pos]
                    new_bucket_states[obj_pos] = min(prev_apples + 1,
                                                     self.bucket_capacity)
            elif obj_pos in new_tree_states and new_tree_states[obj_pos]:
                new_carrying_apple = True
                new_tree_states[obj_pos] = False
            else:
                # Interact while holding nothing and not facing a tree.
                pass
        else:
            raise ValueError("Invalid action {}".format(action))

        new_pos = new_orientation, new_x, new_y

        def make_state(prob_apples_tuple):
            prob, tree_apples = prob_apples_tuple
            trees = dict(zip(self.tree_locations, tree_apples))
            s = ApplesState(new_pos, trees, new_bucket_states,
                            new_carrying_apple)
            return (prob, s, 0)

        # For apple regeneration, don't regenerate apples that were just picked,
        # so use the apple booleans from the original state
        old_tree_apples = [
            state.tree_states[loc] for loc in self.tree_locations
        ]
        new_tree_apples = [new_tree_states[loc] for loc in self.tree_locations]
        return list(
            map(make_state, self.regen_apples(old_tree_apples,
                                              new_tree_apples)))

Exemplo n.º 2

0

Exibir arquivo

    def get_next_state(self, state, action):
        """Returns the next state given a state and an action."""
        action = int(action)
        new_x, new_y = Direction.move_in_direction_number(
            state.agent_pos, action)
        # New position is still in bounds:
        if not (0 <= new_x < self.width and 0 <= new_y < self.height):
            new_x, new_y = state.agent_pos
        new_agent_pos = new_x, new_y

        new_train_pos, new_train_life = state.train_pos, state.train_life
        new_battery_present = deepcopy(state.battery_present)
        new_carrying_battery = state.carrying_battery
        if new_agent_pos == state.train_pos and state.carrying_battery:
            new_train_life = 10
            new_carrying_battery = False

        if new_train_life > 0:
            new_train_pos = self.train_transition[state.train_pos]
            new_train_life -= 1

        if (new_agent_pos in state.battery_present
                and state.battery_present[new_agent_pos]
                and not state.carrying_battery):
            new_carrying_battery = True
            new_battery_present[new_agent_pos] = False

        result = BatteriesState(
            new_agent_pos,
            new_train_pos,
            new_train_life,
            new_battery_present,
            new_carrying_battery,
        )
        return result

Exemplo n.º 3

0

Exibir arquivo

Arquivo: room.py Projeto: HumanCompatibleAI/deep-rlsp

 def get_next_state(self, state, action):
     """Returns the next state given a state and an action."""
     action = int(action)
     new_x, new_y = Direction.move_in_direction_number(
         state.agent_pos, action)
     # New position is still in bounds:
     if not (0 <= new_x < self.width and 0 <= new_y < self.height):
         new_x, new_y = state.agent_pos
     new_agent_pos = new_x, new_y
     new_vase_states = deepcopy(state.vase_states)
     if new_agent_pos in new_vase_states:
         new_vase_states[new_agent_pos] = False  # Break the vase
     return RoomState(new_agent_pos, new_vase_states)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: basic_room.py Projeto: HumanCompatibleAI/deep-rlsp

    def get_next_state(self, state, action):
        """Returns the next state given a state and an action."""
        action = int(action)

        if action == Direction.get_number_from_direction(Direction.STAY):
            pass
        elif action < len(Direction.ALL_DIRECTIONS):
            move_x, move_y = Direction.move_in_direction_number(state, action)
            # New position is legal
            if 0 <= move_x < self.width and 0 <= move_y < self.height:
                state = move_x, move_y
            else:
                # Move only changes orientation, which we already handled
                pass
        else:
            raise ValueError("Invalid action {}".format(action))

        return state

Exemplo n.º 5

0

Exibir arquivo

    def get_next_state(self, state, action):
        """Return the next state given a state and an action."""
        action = int(action)
        new_x, new_y = Direction.move_in_direction_number(
            state.agent_pos, action)
        # New position is still in bounds:
        if not (0 <= new_x < self.width and 0 <= new_y < self.height):
            new_x, new_y = state.agent_pos
        new_agent_pos = new_x, new_y
        new_vase_states = deepcopy(state.vase_states)
        new_train_pos, new_train_intact = state.train_pos, state.train_intact
        if state.train_intact:
            new_train_pos = self.train_transition[state.train_pos]

        # Break the vase and train if appropriate
        if new_agent_pos in new_vase_states:
            new_vase_states[new_agent_pos] = False
        if new_agent_pos == new_train_pos:
            new_train_intact = False
        return TrainState(new_agent_pos, new_vase_states, new_train_pos,
                          new_train_intact)