示例#1
0
    def test_unexpanded_states_2(self):
        mdp_g = mdp_graph.init_graph(graph)
        # Expand states '1' and '2':
        explicit, mdp_g = mdp_graph.expand_state('1', mdp_g, bpsg)
        bpsg_, mdp_g = mdp_graph.expand_state('2', mdp_g, explicit)

        unexpanded = mdp_graph.get_unexpanded_states(mdp_g, bpsg_2)
        self.assertListEqual(unexpanded, [])
示例#2
0
 def visit(s):
     nonlocal explicit_graph, bpsg, mdp, V, pi, V_i, A
     if not mdp[s]['goal'] and not mdp[s]['expanded']:
         explicit_graph, mdp = mg.expand_state(
             s, mdp, explicit_graph)
     # run bellman backup
     V, pi = mg.bellman(V, V_i, pi, A, [s], mdp, gamma=gamma)
示例#3
0
    def test_expand_state_goal(self):
        state = '3'
        mdp_g = mdp_graph.init_graph(graph)
        with pytest.raises(
                ValueError,
                match="State %d can't be expanded because it is a goal state" %
                int(state)):

            _, mdp_g = mdp_graph.expand_state(state, mdp_g, {})
示例#4
0
    def test_expand_state(self):
        init_state = '1'
        explicit_graph = mdp_graph.add_state_graph(init_state, {})
        mdp_g = mdp_graph.init_graph(graph)
        init_state_neighbours = map(lambda _s: _s["name"],
                                    mdp_g[init_state]['Adj'])
        new_explicit_graph, mdp_g = mdp_graph.expand_state(
            init_state, mdp_g, explicit_graph)

        assert mdp_g[init_state]['expanded']

        for s in init_state_neighbours:
            assert s in new_explicit_graph
示例#5
0
def lao(s0, heuristic, V_i, pi, S, A, mdp, epsilon=1e-3, gamma=1):
    bpsg = {s0: {"Adj": []}}
    explicit_graph = deepcopy(bpsg)

    i = 0
    unexpanded = mg.get_unexpanded_states(mdp, bpsg)
    V = heuristic
    while True:
        while len(unexpanded) > 0:
            s = unexpanded[0]
            explicit_graph, mdp = mg.expand_state(s, mdp, explicit_graph)
            Z = mg.find_ancestors(s, bpsg) + [s]
            V, pi = mg.value_iteration(V,
                                       V_i,
                                       pi,
                                       A,
                                       Z,
                                       mdp,
                                       epsilon=epsilon,
                                       gamma=gamma)
            bpsg = mg.update_partial_solution(pi, V_i, s0, S, bpsg, mdp)
            unexpanded = mg.get_unexpanded_states(mdp, bpsg)
            i += 1
        bpsg_states = [s_ for s_ in bpsg.keys() if not mdp[s_]['goal']]
        V, pi, converged = convergence_test(V,
                                            V_i,
                                            pi,
                                            A,
                                            bpsg_states,
                                            mdp,
                                            epsilon=epsilon,
                                            gamma=gamma)

        if converged:
            break
        # else
        bpsg = mg.update_partial_solution(pi, V_i, s0, S, bpsg, mdp)
        unexpanded = mg.get_unexpanded_states(mdp, bpsg)
    return V, pi