Python TransitionOutcome примеры использования

Язык программирования: Python

Пространство имен/Пакет: skdecide

Класс/Тип: TransitionOutcome

Примеров на hotexamples.com: 9

Python TransitionOutcome - 9 примеров найдено. Это лучшие примеры Python кода для skdecide.TransitionOutcome, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

TransitionOutcome(9)

Основные методы

TransitionOutcome (9)

Пример #1

Показать файл

    def _state_step(
        self, action: D.T_agent[D.T_concurrency[D.T_event]]
    ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]],
                           D.T_agent[D.T_info]]:

        # Get players' moves
        move1, move2 = action['player1'], action['player2']

        # Compute rewards
        r1, r2 = {
            (Move.rock, Move.rock): (0, 0),
            (Move.rock, Move.paper): (-1, 1),
            (Move.rock, Move.scissors): (1, -1),
            (Move.paper, Move.rock): (1, -1),
            (Move.paper, Move.paper): (0, 0),
            (Move.paper, Move.scissors): (-1, 1),
            (Move.scissors, Move.rock): (-1, 1),
            (Move.scissors, Move.paper): (1, -1),
            (Move.scissors, Move.scissors): (0, 0)
        }[move1, move2]

        # Compute num_move increment
        last_state = self._memory
        num_move = last_state.num_move + 1

        return TransitionOutcome(state=State(num_move=num_move),
                                 value={
                                     'player1': TransitionValue(reward=r1),
                                     'player2': TransitionValue(reward=r2)
                                 },
                                 termination=(num_move >= self._max_moves))

Пример #2

Показать файл

 def _get_next_state(
     self,
     memory: D.T_memory[D.T_state],
     action: D.T_agent[D.T_concurrency[D.T_event]],
 ) -> D.T_state:
     env = memory._context[0]
     if self._set_state is None or self._get_state is None:
         env = deepcopy(env)
     elif memory._context[4] != self._get_state(env):
         self._set_state(env, memory._context[4])
     self._gym_env = env  # Just in case the simulation environment would be different from the planner's environment...
     obs, reward, done, info = env.step(action)
     outcome = TransitionOutcome(state=obs,
                                 value=Value(reward=reward),
                                 termination=done,
                                 info=info)
     # print('Transition:', str(memory._state), ' -> ', str(action), ' -> ', str(outcome.state))
     return GymDomainStateProxy(
         state=outcome.state,
         context=[
             env,
             memory._state,
             action,
             outcome,
             self._get_state(env) if
             (self._get_state is not None
              and self._set_state is not None) else None,
         ],
     )

Пример #3

Показать файл

Файл: gym_jsbsim_riw.py Проект: galleon/scikit-decide

 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[Value[D.T_value]],
                        D.T_agent[D.T_predicate], D.T_agent[D.T_info], ]:
     o = super()._state_step(action)
     self._current_depth += 1
     self._cumulated_reward += o.value.reward
     # self._cumulated_dist_to_start += math.exp(-math.fabs(self._gym_env.sim.get_property_value(prp.position_distance_from_start_mag_mt)))
     self._cumulated_dist_to_start = self._gym_env.sim.get_property_value(
         prp.position_distance_from_start_mag_mt)
     self._cumulated_dist_to_line += math.exp(-math.fabs(
         self._gym_env.sim.get_property_value(prp.shortest_dist)))
     return TransitionOutcome(
         state=GymDomainStateProxy(
             state=o.state._state,
             context=(
                 self._current_depth,
                 self._cumulated_reward,
                 self._cumulated_dist_to_start,
                 self._cumulated_dist_to_line,
             ),
         ),
         value=o.value,
         termination=o.termination,
         info=o.info,
     )

Пример #4

Показать файл

 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[Value[D.T_value]],
                        D.T_agent[D.T_predicate], D.T_agent[D.T_info], ]:
     obs, reward, done, info = self._gym_env.step(action)
     return TransitionOutcome(state=obs,
                              value=Value(reward=reward),
                              termination=done,
                              info=info)

Пример #5

Показать файл

 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]],
                        D.T_agent[D.T_info]]:
     o = super()._state_step(action)
     return TransitionOutcome(state=o.state,
                              value=TransitionValue(reward=o.value.reward -
                                                    1),
                              termination=o.termination,
                              info=o.info)

Пример #6

Показать файл

 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[TransitionValue[D.T_value]],
                        D.T_agent[D.T_info]]:
     o = super()._state_step(action)
     return TransitionOutcome(state=GymDomainStateProxy(
         state=normalize_and_round(o.state._state),
         context=o.state._context),
                              value=TransitionValue(reward=o.value.reward -
                                                    1),
                              termination=o.termination,
                              info=o.info)

Пример #7

Показать файл

Файл: gym_jsbsim_iw.py Проект: galleon/scikit-decide

 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[Value[D.T_value]],
                        D.T_agent[D.T_predicate], D.T_agent[D.T_info], ]:
     self._gym_env.set_state(self._current_state)
     o = super()._state_step(action)
     self._current_state = self._gym_env.get_state()
     return TransitionOutcome(
         state=o.state,
         value=Value(reward=o.value.reward - 1),
         termination=o.termination,
         info=o.info,
     )

Пример #8

Показать файл

 def _state_step(
     self, action: D.T_agent[D.T_concurrency[D.T_event]]
 ) -> TransitionOutcome[D.T_state, D.T_agent[Value[D.T_value]],
                        D.T_agent[D.T_predicate], D.T_agent[D.T_info], ]:
     obs, reward, done, info = self._gym_env.step(action)
     if self._set_state is not None and self._get_state is not None:
         state = GymDomainStateProxy(state=obs,
                                     context=self._initial_env_state)
     else:
         state = GymDomainStateProxy(state=obs, context=self._init_env)
     return TransitionOutcome(state=state,
                              value=Value(reward=reward),
                              termination=done,
                              info=info)

Пример #9

Показать файл

Файл: maze_multisolve.py Проект: jeromerobert/scikit-decide

 def decode(outcome):
     return TransitionOutcome(
         state=MyShmProxy.StateProxy.decode(outcome[0]),
         value=MyShmProxy.TransitionValueProxy.decode(outcome[1:3]),
         termination=MyShmProxy.BoolProxy.decode(outcome[3]))