Пример #1
0
def finite_horizon_MRP(process: FiniteMarkovRewardProcess[S],
                       limit: int) -> FiniteMarkovRewardProcess[WithTime[S]]:
    """Turn a normal FiniteMarkovRewardProcess into one with a finite horizon
    that stops after 'limit' steps.

    Note that this makes the data representation of the process
    larger, since we end up having distinct sets and transitions for
    every single time step up to the limit.

    """
    transition_map: Dict[WithTime[S], Optional[RewardOutcome]] = {}

    # Non-terminal states
    for time in range(0, limit):

        for s in process.states():
            result: Optional[StateReward[S]] = process.transition_reward(s)
            s_time = WithTime(state=s, time=time)

            transition_map[s_time] = (None if result is None else result.map(
                lambda s_r: (WithTime(state=s_r[0], time=time + 1), s_r[1])))

    # Terminal states
    for s in process.states():
        transition_map[WithTime(state=s, time=limit)] = None

    return FiniteMarkovRewardProcess(transition_map)
Пример #2
0
    def apply_finite_policy(self, policy: FinitePolicy[S, A])\
            -> FiniteMarkovRewardProcess[S]:

        transition_mapping: Dict[S, FiniteDistribution[Tuple[S, float]]] = {}

        for state in self.mapping:
            action_map: ActionMapping[A, S] = self.mapping[state]
            outcomes: DefaultDict[Tuple[S, float], float]\
                = defaultdict(float)
            actions = policy.act(state)
            for action, p_action in actions:
                for (s1, r), p in action_map[action].table().items():
                    outcomes[(s1.state, r)] += p_action * p

            transition_mapping[state.state] = Categorical(outcomes)

        return FiniteMarkovRewardProcess(transition_mapping)
Пример #3
0
def finite_mrp(
    fixed_experiences: Sequence[TransitionStep[S]]
) -> FiniteMarkovRewardProcess[S]:
    def by_state(tr: TransitionStep[S]) -> S:
        return tr.state.state

    d: Mapping[S, Sequence[Tuple[S, float]]] = \
        {s: [(t.next_state.state, t.reward) for t in l] for s, l in
         itertools.groupby(
             sorted(fixed_experiences, key=by_state),
             key=by_state
         )}
    mrp: Dict[S, Categorical[Tuple[S, float]]] = \
        {s: Categorical({x: y / len(l) for x, y in
                         collections.Counter(l).items()})
         for s, l in d.items()}
    return FiniteMarkovRewardProcess(mrp)
Пример #4
0
    def apply_finite_policy(
            self, policy: FinitePolicy[S, A]) -> FiniteMarkovRewardProcess[S]:

        transition_mapping: Dict[S, Optional[StateReward[S]]] = {}

        for state in self.mapping:
            action_map: Optional[ActionMapping[A, S]] = self.mapping[state]

            if action_map is None:
                transition_mapping[state] = None
            else:
                outcomes: DefaultDict[Tuple[S, float],
                                      float] = defaultdict(float)

                actions = policy.act(state)
                if actions is not None:
                    for action, p_action in actions:
                        for outcome, p_state_reward in action_map[action]:
                            outcomes[outcome] += p_action * p_state_reward

                transition_mapping[state] = Categorical(outcomes)

        return FiniteMarkovRewardProcess(transition_mapping)
Пример #5
0
def finite_horizon_MRP(process: FiniteMarkovRewardProcess[S],
                       limit: int) -> FiniteMarkovRewardProcess[WithTime[S]]:
    '''Turn a normal FiniteMarkovRewardProcess into one with a finite horizon
    that stops after 'limit' steps.

    Note that this makes the data representation of the process
    larger, since we end up having distinct sets and transitions for
    every single time step up to the limit.

    '''
    transition_map: Dict[WithTime[S], RewardOutcome] = {}

    # Non-terminal states
    for time in range(limit):

        for s in process.non_terminal_states:
            result: StateReward[S] = process.transition_reward(s)
            s_time = WithTime(state=s.state, time=time)

            transition_map[s_time] = result.map(
                lambda sr: (WithTime(state=sr[0].state, time=time + 1), sr[1]))

    return FiniteMarkovRewardProcess(transition_map)
Пример #6
0
 def __init__(self, sl_mapping: SLMapping):
     SnakesLaddersFMP.__init__(self, sl_mapping)
     FiniteMarkovRewardProcess.__init__(self, self.get_transition_reward_map())