示例#1
0
def graph_PI():
    n_states = 10
    n_actions = 2

    det_pis = utils.get_deterministic_policies(n_states, n_actions)
    print('n pis: {}'.format(len(det_pis)))
    mdp = utils.build_random_sparse_mdp(n_states, n_actions, 0.5)

    A = graph.mdp_topology(det_pis)
    G = nx.from_numpy_array(A)
    pos = nx.spring_layout(G, iterations=200)

    basis = graph.construct_mdp_basis(det_pis, mdp)

    init_pi = utils.softmax(np.random.standard_normal((n_states, n_actions)))
    init_v = utils.value_functional(mdp.P, mdp.r, init_pi, mdp.discount).squeeze()
    a = graph.sparse_coeffs(basis, init_v, lr=0.1)

    pis = utils.solve(search_spaces.policy_iteration(mdp), init_pi)
    print("\n{} policies to vis".format(len(pis)))

    for i, pi in enumerate(pis[:-1]):
        print('Iteration: {}'.format(i))
        v = utils.value_functional(mdp.P, mdp.r, pi, mdp.discount).squeeze()
        a = graph.sparse_coeffs(basis, v, lr=0.1, a_init=a)
        plt.figure(figsize=(16,16))
        nx.draw(G, pos, node_color=a, node_size=150)
        # plt.show()
        plt.savefig('figs/pi_graphs/{}.png'.format(i))
        plt.close()
示例#2
0
def graph_PG():
    # ffmpeg -framerate 10 -start_number 0 -i %d.png -c:v libx264 -r 30 -pix_fmt yuv420p out.mp4
    n_states = 6
    n_actions = 4

    det_pis = utils.get_deterministic_policies(n_states, n_actions)
    print('n pis: {}'.format(len(det_pis)))
    mdp = utils.build_random_mdp(n_states, n_actions, 0.9)

    A = graph.mdp_topology(det_pis)
    G = nx.from_numpy_array(A)
    pos = nx.spring_layout(G, iterations=200)

    basis = graph.construct_mdp_basis(det_pis, mdp)

    init_logits = np.random.standard_normal((n_states, n_actions))
    init_v = utils.value_functional(mdp.P, mdp.r, utils.softmax(init_logits), mdp.discount).squeeze()
    a = graph.sparse_coeffs(basis, init_v, lr=0.1)

    print('\nSolving PG')
    pis = utils.solve(search_spaces.policy_gradient_iteration_logits(mdp, 0.1), init_logits)
    print("\n{} policies to vis".format(len(pis)))
    n = len(pis)
    # pis = pis[::n//100]
    pis = pis[0:20]

    for i, pi in enumerate(pis[:-1]):
        print('Iteration: {}'.format(i))
        v = utils.value_functional(mdp.P, mdp.r, pi, mdp.discount).squeeze()
        a = graph.sparse_coeffs(basis, v, lr=0.1, a_init=a)
        plt.figure()
        nx.draw(G, pos, node_color=a)
        # plt.show()
        plt.savefig('figs/pg_graphs/{}.png'.format(i))
        plt.close()
示例#3
0
文件: graph_tests.py 项目: act65/mdps
def test_estimation():
    n_states = 5
    n_actions = 2

    det_pis = utils.get_deterministic_policies(mdp.S, mdp.A)
    mdp = utils.build_random_mdp(n_states, n_actions, 0.9)
    basis = graph.construct_mdp_basis(det_pis, mdp)

    v = np.random.random((n_states, ))
    a = graph.estimate_coeffs(basis.T, v)
    print(a)
示例#4
0
文件: graph_tests.py 项目: act65/mdps
def test_sparse_estimation():
    n_states = 5
    n_actions = 2

    mdp = utils.build_random_mdp(n_states, n_actions, 0.9)
    det_pis = utils.get_deterministic_policies(mdp.S, mdp.A)
    basis = graph.construct_mdp_basis(det_pis, mdp)

    v = utils.value_functional(mdp.P, mdp.r, det_pis[2],
                               mdp.discount).squeeze()
    a = graph.sparse_coeffs(basis, v)

    print(a)
示例#5
0
文件: graph_tests.py 项目: act65/mdps
def test_everything():
    n_states = 5
    n_actions = 2

    det_pis = utils.get_deterministic_policies(n_states, n_actions)
    mdp = utils.build_random_mdp(n_states, n_actions, 0.9)

    A = graph.mdp_topology(det_pis)
    basis = graph.construct_mdp_basis(det_pis, mdp)

    # v = np.random.random((n_states, ))
    v = utils.value_functional(mdp.P, mdp.r, det_pis[2],
                               mdp.discount).squeeze()
    a = graph.sparse_coeffs(basis, v)

    G = nx.from_numpy_array(A)
    pos = nx.spring_layout(G, iterations=200)
    nx.draw(G, pos, node_color=a)
    plt.show()