Exemplo n.º 1
0
def mdp_rep_to_mrp_rep1(
    mdp_rep: SASf,
    policy_rep: SAf
) -> SSf:
    return {s: sum_dicts([{s1: p * v2 for s1, v2 in v[a].items()}
                          for a, p in policy_rep[s].items()])
            for s, v in mdp_rep.items()}
Exemplo n.º 2
0
def get_state_reward_gen_dict(tr: SASf, rr: SASf) \
        -> Mapping[S, Mapping[A, Callable[[], Tuple[S, float]]]]:
    return {
        s: {
            a: get_state_reward_gen_func(tr[s][a], rr[s][a])
            for a, _ in v.items()
        }
        for s, v in rr.items()
    }
Exemplo n.º 3
0
def flatten_sasf_dict(sasf: SASf) -> FlattenedDict:
    return [((s, a, s1), f)
            for s, asf in sasf.items()
            for a, sf in asf.items()
            for s1, f in sf.items()]