Exemplo n.º 1
0
def main():

    with PuddleWorld(start=(0.5, 0.1), resolution=0.05) as world:
        # R = PuddleReward(rmax=1.0, step_reward=0.1)
        R = PuddleRewardLFA(weights=[1, -1], rmax=1.0)
        T = PWTransition()
        g = PuddleWorldMDP(reward=R, transition=T, discount=0.98)

        # ------------------------
        mdp_planner = PolicyIteration()
        res = mdp_planner.solve(g)
        V = res['V']
        print(V)
        print(res['pi'])

    fig = plt.figure(figsize=(8, 8))
    ax = fig.gca()
    ax = world.visualize(ax, policy=res['pi'])
    # plt.savefig('world.svg')

    plt.figure(figsize=(8, 8))
    plt.imshow(V.reshape(world.shape).T,  # interpolation='nearest',
               cmap='viridis', origin='lower',
               vmin=np.min(V), vmax=np.max(V))
    plt.grid(False)
    plt.title('Value function')
    plt.colorbar(orientation='horizontal')
    # plt.savefig('world_value.svg')

    plt.show()
Exemplo n.º 2
0
def main():
    NUM_STATES = 10

    with ChainWorld(num_states=NUM_STATES) as world:
        R = ChainReward()
        T = ChainTransition()
        mdp = ChainMDP(R, T, discount=0.98)

        planner = PolicyIteration()
        plan = planner.solve(mdp)

        print(plan['pi'])

    fig = plt.figure(figsize=(12, 3))
    ax = fig.gca()
    ax = world.visualize(ax)
    ax = world.show_policy(ax, policy=plan['pi'])

    plt.figure(figsize=(8, 8))
    plt.plot(plan['V'])
    plt.title('Value function')

    plt.show()