Пример #1
0
    def _state_step(
        self, action: D.T_agent[D.T_concurrency[D.T_event]]
    ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]],
                           D.T_agent[D.T_info]]:

        # Get players' moves
        move1, move2 = action['player1'], action['player2']

        # Compute rewards
        r1, r2 = {
            (Move.rock, Move.rock): (0, 0),
            (Move.rock, Move.paper): (-1, 1),
            (Move.rock, Move.scissors): (1, -1),
            (Move.paper, Move.rock): (1, -1),
            (Move.paper, Move.paper): (0, 0),
            (Move.paper, Move.scissors): (-1, 1),
            (Move.scissors, Move.rock): (-1, 1),
            (Move.scissors, Move.paper): (1, -1),
            (Move.scissors, Move.scissors): (0, 0)
        }[move1, move2]

        # Compute num_move increment
        last_state = self._memory
        num_move = last_state.num_move + 1

        return TransitionOutcome(state=State(num_move=num_move),
                                 value={
                                     'player1': TransitionValue(reward=r1),
                                     'player2': TransitionValue(reward=r2)
                                 },
                                 termination=(num_move >= self._max_moves))
 def  _get_transition_value(self, state: TrafficLightState, action: TrafficLightAction, next_state: Optional[TrafficLightState] = None) -> TransitionValue:
     '''
       Returns the value of the transition
     '''
     # DONE
     if next_state is not None:
         return TransitionValue(cost=next_state.cars_queueing_east+state.cars_queueing_north,
                                reward=-(next_state.cars_queueing_east+state.cars_queueing_north))
     else:
         return TransitionValue(cost=0)
Пример #3
0
 def _get_transition_value(
         self,
         state: SMState,
         action: SMAction,
         next_state: Optional[SMState] = None) -> TransitionValue:
     (value, _distrib) = self._state_to_action_to_output[state][action]
     return TransitionValue(cost=value)
Пример #4
0
 def _get_transition_value(
     self,
     memory: D.T_memory[D.T_state],
     action: D.T_agent[D.T_concurrency[D.T_event]],
     next_state: Optional[D.T_state] = None
 ) -> D.T_agent[TransitionValue[D.T_value]]:
     v = super()._get_transition_value(memory, action, next_state)
     return TransitionValue(reward=v.reward - 1)
Пример #5
0
    def _get_transition_value(self, memory: D.T_memory[D.T_state], action: D.T_agent[D.T_concurrency[D.T_event]],
                            next_state: Optional[D.T_state] = None) -> D.T_agent[TransitionValue[D.T_value]]:

        if next_state.x == memory.x and next_state.y == memory.y:
            cost = 2  # big penalty when hitting a wall
        else:
            cost = abs(next_state.x - memory.x) + abs(next_state.y - memory.y)  # every move costs 1

        return TransitionValue(cost=cost)
 def _get_transition_value(
     self,
     memory: D.T_memory[D.T_state],
     action: D.T_agent[D.T_concurrency[D.T_event]],
     next_state: Optional[D.T_state] = None
 ) -> D.T_agent[TransitionValue[D.T_value]]:
     # every move costs 1
     return TransitionValue(cost=abs(next_state.x - memory.x) +
                            abs(next_state.y - memory.y))
Пример #7
0
 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]],
                        D.T_agent[D.T_info]]:
     obs, reward, done, info = self._gym_env.step(action)
     return TransitionOutcome(state=obs,
                              value=TransitionValue(reward=reward),
                              termination=done,
                              info=info)
Пример #8
0
 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]],
                        D.T_agent[D.T_info]]:
     o = super()._state_step(action)
     return TransitionOutcome(state=o.state,
                              value=TransitionValue(reward=o.value.reward -
                                                    1),
                              termination=o.termination,
                              info=o.info)
Пример #9
0
 def _sample(self, memory: D.T_memory[D.T_state], action: D.T_agent[D.T_concurrency[D.T_event]]) -> \
     EnvironmentOutcome[D.T_agent[D.T_observation], D.T_agent[TransitionValue[D.T_value]], D.T_agent[D.T_info]]:
     o = super()._sample(memory, action)
     return EnvironmentOutcome(observation=GymDomainStateProxy(
         state=normalize_and_round(o.observation._state),
         context=o.observation._context),
                               value=TransitionValue(reward=o.value.reward -
                                                     1),
                               termination=o.termination,
                               info=o.info)
Пример #10
0
 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]],
                        D.T_agent[D.T_info]]:
     o = super()._state_step(action)
     return TransitionOutcome(state=GymDomainStateProxy(
         state=normalize_and_round(o.state._state),
         context=o.state._context),
                              value=TransitionValue(reward=o.value.reward -
                                                    1),
                              termination=o.termination,
                              info=o.info)
Пример #11
0
    def test(self):
        dom = TrafficLightDomain()

        state = TrafficLightState(cars_queueing_north=3,
                                  cars_queueing_east=2,
                                  north_light=SingleLightState.RECENT_RED,
                                  east_light=SingleLightState.RED)
        next_state = TrafficLightState(cars_queueing_north=3,
                                       cars_queueing_east=3,
                                       north_light=SingleLightState.RED,
                                       east_light=SingleLightState.GREEN)
        action = TrafficLightAction.DO_NOT_SWITCH
        self.assertEqual(dom.get_transition_value(state, action, next_state),
                         TransitionValue(cost=6))
Пример #12
0
 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]],
                        D.T_agent[D.T_info]]:
     obs, reward, done, info = self._gym_env.step(action)
     if self._set_state is not None and self._get_state is not None:
         state = GymDomainStateProxy(state=obs,
                                     context=self._initial_env_state)
     else:
         state = GymDomainStateProxy(state=obs, context=self._init_env)
     return TransitionOutcome(state=state,
                              value=TransitionValue(reward=reward),
                              termination=done,
                              info=info)
Пример #13
0
 def _get_next_state(
         self, memory: D.T_memory[D.T_state],
         action: D.T_agent[D.T_concurrency[D.T_event]]) -> D.T_state:
     env = memory._context[0]
     if self._set_state is None or self._get_state is None:
         env = deepcopy(env)
     elif memory._context[4] != self._get_state(env):
         self._set_state(env, memory._context[4])
     self._gym_env = env  # Just in case the simulation environment would be different from the planner's environment...
     obs, reward, done, info = env.step(action)
     outcome = TransitionOutcome(state=obs,
                                 value=TransitionValue(reward=reward),
                                 termination=done,
                                 info=info)
     # print('Transition:', str(memory._state), ' -> ', str(action), ' -> ', str(outcome.state))
     return GymDomainStateProxy(
         state=outcome.state,
         context=[
             env, memory._state, action, outcome,
             self._get_state(env) if
             (self._get_state is not None
              and self._set_state is not None) else None
         ])
Пример #14
0
 def decode(value):
     if value[1].value:
         return TransitionValue(reward=value[0].value)
     else:
         return TransitionValue(cost=value[0].value)
Пример #15
0
 def _get_transition_value(self, memory: D.T_memory[D.T_state], action: D.T_agent[D.T_concurrency[D.T_event]],
                           next_state: Optional[D.T_state] = None) -> D.T_agent[TransitionValue[D.T_value]]:
     return TransitionValue(cost=1)
Пример #16
0
 def _get_transition_value(self, state: GridState, action: GridAction, next_state: Optional[GridState] = None) -> TransitionValue:
     return TransitionValue(cost=action._cost)