def test_unexpanded_states_2(self): mdp_g = mdp_graph.init_graph(graph) # Expand states '1' and '2': explicit, mdp_g = mdp_graph.expand_state('1', mdp_g, bpsg) bpsg_, mdp_g = mdp_graph.expand_state('2', mdp_g, explicit) unexpanded = mdp_graph.get_unexpanded_states(mdp_g, bpsg_2) self.assertListEqual(unexpanded, [])
def visit(s): nonlocal explicit_graph, bpsg, mdp, V, pi, V_i, A if not mdp[s]['goal'] and not mdp[s]['expanded']: explicit_graph, mdp = mg.expand_state( s, mdp, explicit_graph) # run bellman backup V, pi = mg.bellman(V, V_i, pi, A, [s], mdp, gamma=gamma)
def test_expand_state_goal(self): state = '3' mdp_g = mdp_graph.init_graph(graph) with pytest.raises( ValueError, match="State %d can't be expanded because it is a goal state" % int(state)): _, mdp_g = mdp_graph.expand_state(state, mdp_g, {})
def test_expand_state(self): init_state = '1' explicit_graph = mdp_graph.add_state_graph(init_state, {}) mdp_g = mdp_graph.init_graph(graph) init_state_neighbours = map(lambda _s: _s["name"], mdp_g[init_state]['Adj']) new_explicit_graph, mdp_g = mdp_graph.expand_state( init_state, mdp_g, explicit_graph) assert mdp_g[init_state]['expanded'] for s in init_state_neighbours: assert s in new_explicit_graph
def lao(s0, heuristic, V_i, pi, S, A, mdp, epsilon=1e-3, gamma=1): bpsg = {s0: {"Adj": []}} explicit_graph = deepcopy(bpsg) i = 0 unexpanded = mg.get_unexpanded_states(mdp, bpsg) V = heuristic while True: while len(unexpanded) > 0: s = unexpanded[0] explicit_graph, mdp = mg.expand_state(s, mdp, explicit_graph) Z = mg.find_ancestors(s, bpsg) + [s] V, pi = mg.value_iteration(V, V_i, pi, A, Z, mdp, epsilon=epsilon, gamma=gamma) bpsg = mg.update_partial_solution(pi, V_i, s0, S, bpsg, mdp) unexpanded = mg.get_unexpanded_states(mdp, bpsg) i += 1 bpsg_states = [s_ for s_ in bpsg.keys() if not mdp[s_]['goal']] V, pi, converged = convergence_test(V, V_i, pi, A, bpsg_states, mdp, epsilon=epsilon, gamma=gamma) if converged: break # else bpsg = mg.update_partial_solution(pi, V_i, s0, S, bpsg, mdp) unexpanded = mg.get_unexpanded_states(mdp, bpsg) return V, pi