Пример #1
0
def generate_markov_mdp_pair(n_states,
                             n_abs_states,
                             n_actions,
                             sparsity=0,
                             gamma=0.9,
                             equal_block_rewards=True,
                             equal_block_transitions=True):
    # Sometimes numerical precision causes the abstract mdp to appear non-Markov
    # so we just keep generating until the problem goes away. Usually it's fine.
    while True:
        # generate an MDP and an abstraction function
        mdp_gnd = MDP.generate(n_states=n_states,
                               n_actions=n_actions,
                               sparsity=sparsity,
                               gamma=gamma)
        assert n_abs_states < n_states
        phi = random_phi(n_states, n_abs_states)

        agg_states = ((phi.sum(axis=0) > 1) @ phi.transpose()).astype(bool)
        other_states = ((phi.sum(axis=0) == 1) @ phi.transpose()).astype(bool)

        random_weights = random_transition_matrix(
            (1, n_states - n_abs_states + 1))

        # adjust T and R to achieve desired properties
        R = np.copy(mdp_gnd.R)
        T = np.copy(mdp_gnd.T)
        for a in range(mdp_gnd.n_actions):
            if equal_block_rewards:
                R[a][agg_states[:, None] * agg_states] = np.mean(
                    mdp_gnd.R[a][agg_states[:, None] * agg_states])
                R[a][other_states[:, None] * agg_states] = np.mean(
                    mdp_gnd.R[a][other_states[:, None] * agg_states])
                R[a][agg_states[:, None] * other_states] = np.mean(
                    mdp_gnd.R[a][agg_states[:, None] * other_states])

            T[a][:, agg_states] = random_weights * np.sum(
                mdp_gnd.T[a][:, agg_states], axis=1, keepdims=True)
            if equal_block_transitions:
                T[a][agg_states] = np.mean(T[a][agg_states, :], axis=0)
                T[a][agg_states][:, agg_states] = random_weights * np.sum(
                    T[a][agg_states][:, agg_states], axis=1, keepdims=True)
            # T[a][:,other_states] = random_transition_matrix((1,mdp_gnd.n_states-2)) * np.sum(mdp_gnd.T[a][:,other_states],axis=1, keepdims=True)
            assert (is_stochastic(T[a]))
        mdp_gnd.R = R
        mdp_gnd.T = T

        p0 = random_transition_matrix((1, n_states)).squeeze()
        mdp_abs = AbstractMDP(mdp_gnd, phi, p0=p0)

        # Ensure that the abstraction is markov by checking inverse models and ratios
        if is_markov(mdp_abs):
            break
    return mdp_gnd, mdp_abs
Пример #2
0
def test():
    mdp = MDP.generate(n_states=4, n_actions=2)
    pi_list = mdp.all_policies()
    v_list = [vi(mdp, pi)[0] for pi in pi_list]
    v_ranks = sorted_order(v_list)

    sorted_v = [v for _, v in sorted(zip(v_ranks, v_list))]
    for v1, v2 in zip(sorted_v[:-1], sorted_v[1:]):
        assert compare_value_fns(v1, v2) != '<'
    # for pi1, v1 in zip(pi_list, v_list):
    #     for pi2, v2 in zip(pi_list, v_list):
    #         print(v1.round(4))
    #         print(compare_value_fns(v1, v2), v2.round(4))
    #         print()

    v_star, _, pi_star = vi(mdp)
    assert compare_value_fns(v_star, sorted_v[0]) == '='
Пример #3
0
def main():
    mdp = BlockMDP(MDP.generate(n_states=5, n_actions=6), n_obs_per_block=3)
    v, q, pi = vi(mdp)

    v_alt = np.zeros_like(v)
    for s in range(mdp.n_states):
        v_alt[s] = q[pi[s]][s]
    v_alt = v_alt.squeeze()
    assert np.allclose(v_alt, v)

    v_pi = vi(mdp, pi)[0]
    assert np.allclose(v_pi, v)

    m_phi = mdp.base_mdp
    v_phi, q_phi, pi_phi = vi(m_phi)
    pi_phi_grounded = np.kron(pi_phi,
                              np.ones((1, mdp.n_states // m_phi.n_states)))
    assert np.allclose(pi_phi_grounded, pi)
    print('All tests passed.')
Пример #4
0
def generate_non_markov_mdp_pair(n_states,
                                 n_abs_states,
                                 n_actions,
                                 sparsity=0,
                                 gamma=0.9,
                                 fixed_w=False):
    while True:
        mdp_gnd = MDP.generate(n_states=n_states,
                               n_actions=n_actions,
                               sparsity=sparsity,
                               gamma=gamma)
        assert n_abs_states < n_states
        phi = random_phi(n_states, n_abs_states)
        if fixed_w:
            mdp_abs = UniformAbstractMDP(mdp_gnd, phi)
        else:
            mdp_abs = AbstractMDP(mdp_gnd, phi)

        # Ensure non-markov by checking inverse models and ratios
        if not is_markov(mdp_abs):
            break
    return mdp_gnd, mdp_abs