示例#1
0
def generate_vi(mdp, c, lr=0.1):
    init_pi = utils.random_policy(mdp.S,mdp.A)
    init_v = utils.value_functional(mdp.P, mdp.r, init_pi, mdp.discount)
    vs = np.stack(utils.solve(ss.value_iteration(mdp, lr), init_v))[:,:,0]
    n = vs.shape[0]
    plt.scatter(vs[0, 0], vs[0, 1], c=c, s=30, label='{}'.format(n))
    plt.scatter(vs[1:-1, 0], vs[1:-1, 1], c=range(n-2), cmap='viridis', s=10)
    plt.scatter(vs[-1, 0], vs[-1, 1], c='m', marker='x')
示例#2
0
def VI(init, M, f):

    # solve
    V_init = utils.value_functional(M.P, M.r, init, M.discount)
    V_star = utils.solve(ss.value_iteration(M, 0.01), V_init)[-1]

    # lift
    return np.dot(f.T, V_star)
示例#3
0
def value_iteration(mdp, pis, lr):
    trajs = []

    for pi in pis:
        init_V = utils.value_functional(mdp.P, mdp.r, pi, mdp.discount)

        traj = utils.solve(ss.value_iteration(mdp, lr), init_V)
        v_star = traj[-1]
        trajs.append(traj)
    return trajs
示例#4
0
def value_iteration(mdp, pis):
    lens, pi_stars = [], []

    for pi in pis:
        init_V = utils.value_functional(mdp.P, mdp.r, pi, mdp.discount)
        pi_traj = utils.solve(ss.value_iteration(mdp, 0.01), init_V)
        pi_star = pi_traj[-1]

        pi_stars.append(pi_star)
        lens.append(len(pi_traj))

    return lens, pi_stars
示例#5
0
def mom_value_iteration(mdp, pis):
    lens, pi_stars = [], []

    for pi in pis:
        init_V = utils.value_functional(mdp.P, mdp.r, pi, mdp.discount)
        pi_traj = utils.solve(
            ss.momentum_bundler(ss.value_iteration(mdp, 0.01), 0.9),
            (init_V, np.zeros_like(init_V)))
        pi_star, _ = pi_traj[-1]

        pi_stars.append(pi_star)
        lens.append(len(pi_traj))

    return lens, pi_stars