def test_network_distribution(): T1 = np.array([[0, 0.5, 0.5], [0, 1, 0], [0, 0, 1]]) Z2 = np.array([[1, 0], [1, 0], [0, 1]]) pomdp1 = POMDP([T1], [Z2], input_names=['u1'], state_name='x1', output_name='z1') T21 = np.array([[0, 1, 0], [0, 1, 0], [0, 0, 1]]) T22 = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1]]) pomdp2 = POMDP([T21, T22], [np.eye(3)], input_names=['u2'], state_name='x2', output_name='z2') network = POMDPNetwork([pomdp1, pomdp2]) network.add_connection(['z1'], 'u2', lambda z1: {z1}) # distribution over u1 x1 x2 D_ux = sparse.COO([[0], [0], [0]], [1], shape=(1, 3, 3)) D_xz = propagate_network_distribution(network, D_ux) D_xz_r = sparse.COO([[1, 2], [1, 2], [0, 1], [1, 2]], [0.5, 0.5], shape=(3, 3, 2, 3)) np.testing.assert_equal(D_xz.todense(), D_xz_r.todense())
def plan_mission(prob): rob_abstr = Grid(prob['xmin'], prob['xmax'], prob['discretization'], name_prefix='c') env_list = [ environment_belief_model(info[1], name) for (name, info) in prob['regs'].items() ] # Construct rob-env network rob_env_network = POMDPNetwork([rob_abstr.pomdp] + env_list) for item in prob['regs'].items(): rob_env_network.add_connection(['c_x'], '{}_u'.format(item[0]), get_rob_env_conn(item)) # solve rob LTL problem predicates = get_predicates(prob['regs']) rob_ltlpol = solve_ltl_cosafe(rob_env_network, prob['formula'], predicates, horizon=prob['cas_T'], delta=prob['step_margin'], verbose=False) return CassiePolicy(rob_ltlpol, rob_abstr)
def plan_exploration(prob, rob_policy): rob_abstr = rob_policy.abstraction rob_ltlpol = rob_policy.ltlpol informed_samples = [r.chebXc for r, _, _ in prob['regs'].values() ] + [prob['uav_x0'], prob['uav_xT']] uav_prm = PRM(prob['xmin'], prob['xmax'], num_nodes=40, min_dist=2, max_dist=5, informed_samples=informed_samples, name_prefix='u') env_list = [ environment_belief_model(info[1], name) for (name, info) in prob['regs'].items() ] # Construct uav-env network uav_env_network = POMDPNetwork([uav_prm.mdp] + env_list) for item in prob['regs'].items(): uav_env_network.add_connection(['u_x'], '{}_u'.format(item[0]), get_uav_env_conn(item)) # solve uav exploration problem idx = np.logical_or( rob_ltlpol.val[0][rob_abstr.x_to_s(prob['cas_x0']), ..., rob_ltlpol.dfsa_init] > prob['accept_margin'], rob_ltlpol.val[0][rob_abstr.x_to_s(prob['cas_x0']), ..., rob_ltlpol.dfsa_init] < prob['reject_margin']) target = np.zeros(uav_env_network.N) target[uav_prm.x_to_s(prob['uav_xT'])][idx] = 1 costs = uav_prm.costs.reshape(uav_prm.costs.shape + (1, ) * (1 + len(uav_env_network.N) - 2)) val_uav, pol_uav = solve_ssp(uav_env_network, costs, target, M=500, verbose=False) return UAVPolicy(pol_uav, val_uav, uav_prm)
def test_ssp_valiter2(): T0 = np.array([[0.1, 0.9, 0], [0, 1, 0], [0, 0, 1]]) network = POMDPNetwork([POMDP([T0])]) costs = np.ones([1, 3]) target = np.array([0, 1, 0]) val, pol = solve_ssp(network, costs, target, M=10) np.testing.assert_almost_equal(val, [1 / 0.9, 0, np.Inf], decimal=4)
def test_evaluate_Q(): T1 = np.array([[0, 0.5, 0.5], [0, 1, 0], [0, 0, 1]]) Z2 = np.array([[1, 0], [1, 0], [0, 1]]) pomdp1 = POMDP([T1], [Z2], input_names=['u1'], state_name='x1', output_name='z1') T21 = np.array([[0, 1, 0], [0, 1, 0], [0, 0, 1]]) T22 = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1]]) pomdp2 = POMDP([T21, T22], [np.eye(3)], input_names=['u2'], state_name='x2', output_name='z2') network = POMDPNetwork([pomdp1, pomdp2]) network.add_connection(['z1'], 'u2', lambda z1: {z1}) V = np.array([[0, 0, 0], [0, 0, 0], [0, 0, 1]]) np.testing.assert_almost_equal(evaluate_Q(network, (0, ), (0, 0), V), 0.5)
def test_ssp_valiter1(): T1 = np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1]]) T2 = np.array([[0, 1, 0, 0, 0], [0, 0, 0.5, 0, 0.5], [0, 0, 0, 0, 1], [0, 0, 0, 0, 1], [0, 0, 0, 0, 1]]) pomdp = POMDP([T1, T2]) network = POMDPNetwork([pomdp]) costs = np.ones([2, 5]) costs[1, 2] = 50 costs[1, 3] = 20 costs[:, 4] = 0 target = np.array([0, 0, 0, 0, 1]) val, pol = solve_ssp(network, costs, target, M=1000) np.testing.assert_almost_equal(val, [21, 26, 50, 20, 0])
def test_evolve(): '''test non-deterministic connection''' T0 = np.array([[0, 1, 0], [0, 0, 1], [0, 0, 1]]) T1 = np.array([[1, 0, 0], [1, 0, 0], [0, 1, 0]]) mdp1 = POMDP([T0, T1], input_names=['u1'], state_name='x1') mdp2 = POMDP([T0, T1], input_names=['u2'], state_name='x2') network = POMDPNetwork() network.add_pomdp(mdp1) sp, _ = network.evolve([0], (0,)) np.testing.assert_equal(sp, [1]) network.add_pomdp(mdp2) sp, _ = network.evolve([1,1], (0,1)) np.testing.assert_equal(sp, [2, 0]) network.add_connection(['x1'], 'u2', lambda x1: set([0, 1])) n0 = 0 n2 = 0 for i in range(1000): sp, _ = network.evolve([1,1], (0,)) np.testing.assert_equal(sp[0], 2) if sp[1] == 0: n0 += 1 if sp[1] == 2: n2 += 1 np.testing.assert_equal(n0 + n2, 1000) np.testing.assert_array_less(abs(n0 -n2), 100)