def test_particles(): random_dist = {} total_prob = 0 for v in range(4): random_dist[f"x{v}"] = random.uniform(0, 1) total_prob += random_dist[f"x{v}"] for v in random_dist: random_dist[v] /= total_prob particles = pomdp_py.Particles.from_histogram( pomdp_py.Histogram(random_dist), num_particles=int(1e6)) for v in random_dist: assert abs(particles[v] - random_dist[v]) <= 2e-3 counts = {} total = int(1e6) for i in range(total): v = particles.random() counts[v] = counts.get(v, 0) + 1 for v in counts: counts[v] /= total for v in random_dist: assert abs(counts[v] - random_dist[v]) <= 2e-3 assert particles.mpe() == pomdp_py.Histogram(random_dist).mpe()
def test_weighted_particles(): random_dist = {} total_prob = 0 for v in range(5): random_dist[f"x{v}"] = random.uniform(0, 1) total_prob += random_dist[f"x{v}"] for v in random_dist: random_dist[v] /= total_prob particles = pomdp_py.WeightedParticles.from_histogram( pomdp_py.Histogram(random_dist)) assert abs(sum(particles[v] for v, _ in particles) - 1.0) <= 1e-6 for v in random_dist: assert abs(particles[v] - random_dist[v]) <= 2e-3 counts = {} total = int(1e6) for i in range(total): v = particles.random() counts[v] = counts.get(v, 0) + 1 for v in counts: counts[v] /= total for v in random_dist: assert abs(counts[v] - random_dist[v]) <= 2e-3 assert particles.mpe() == pomdp_py.Histogram(random_dist).mpe()
def initialize_belief(grid_map, init_robot_position, prior={}): """Initialize belief. Args: grid_map (GridMap): Holds information of the map occupancy prior (dict): A map from (x,y)->[0,1]. If empty, the belief will be uniform.""" hist = {} # state -> prob total_prob = 0.0 for target_position in prior: state = TagState(init_robot_position, target_position, False) hist[state] = prior[target_position] total_prob += hist[state] for x in range(grid_map.width): for y in range(grid_map.length): if (x, y) in grid_map.obstacle_poses: # Skip obstacles continue state = TagState(init_robot_position, (x, y), False) if len(prior) > 0: if (x, y) not in prior: hist[state] = 1e-9 else: hist[state] = 1.0 total_prob += hist[state] # Normalize for state in hist: hist[state] /= total_prob hist_belief = pomdp_py.Histogram(hist) return hist_belief
def belief_update(agent, real_action, real_observation, next_robot_state, planner): """Updates the agent's belief; The belief update may happen through planner update (e.g. when planner is POMCP).""" # Updates the planner; In case of POMCP, agent's belief is also updated. planner.update(agent, real_action, real_observation) # Update agent's belief, when planner is not POMCP if not isinstance(planner, pomdp_py.POMCP): # Update belief for every object for objid in agent.cur_belief.object_beliefs: belief_obj = agent.cur_belief.object_belief(objid) if isinstance(belief_obj, pomdp_py.Histogram): if objid == agent.robot_id: # Assuming the agent can observe its own state: new_belief = pomdp_py.Histogram({next_robot_state: 1.0}) else: # This is doing # B(si') = normalizer * O(oi|si',sr',a) * sum_s T(si'|s,a)*B(si) # # Notes: First, objects are static; Second, # O(oi|s',a) ~= O(oi|si',sr',a) according to the definition # of the observation model in models/observation.py. Note # that the exact belief update rule for this OOPOMDP needs to use # a model like O(oi|si',sr',a) because it's intractable to # consider s' (that means all combinations of all object # states must be iterated). Of course, there could be work # around (out of scope) - Consider a volumetric observaiton, # instead of the object-pose observation. That means oi is a # set of pixels (2D) or voxels (3D). Note the real # observation, oi, is most likely sampled from O(oi|s',a) # because real world considers the occlusion between objects # (due to full state s'). The problem is how to compute the # probability of this oi given s' and a, where it's # intractable to obtain s'. To this end, we can make a # simplifying assumption that an object is contained within # one pixel (or voxel); The pixel (or voxel) is labeled to # indicate free space or object. The label of each pixel or # voxel is certainly a result of considering the full state # s. The occlusion can be handled nicely with the volumetric # observation definition. Then that assumption can reduce the # observation model from O(oi|s',a) to O(label_i|s',a) and # it becomes easy to define O(label_i=i|s',a) and O(label_i=FREE|s',a). # These ideas are used in my recent 3D object search work. new_belief = pomdp_py.update_histogram_belief( belief_obj, real_action, real_observation.for_obj(objid), agent.observation_model[objid], agent.transition_model[objid], # The agent knows the objects are static. static_transition=objid != agent.robot_id, oargs={"next_robot_state": next_robot_state}) else: raise ValueError("Unexpected program state."\ "Are you using the appropriate belief representation?") agent.cur_belief.set_object_belief(objid, new_belief)
def make_tiger(noise=0.15, init_state="tiger-left", init_belief=[0.5, 0.5]): """Convenient function to quickly build a tiger domain. Useful for testing""" tiger = TigerProblem( noise, TigerState(init_state), pomdp_py.Histogram({ TigerState("tiger-left"): init_belief[0], TigerState("tiger-right"): init_belief[1] })) return tiger
def _initialize_histogram_belief(dim, robot_id, object_ids, prior, robot_orientations): """ Returns the belief distribution represented as a histogram """ oo_hists = {} # objid -> Histogram width, length = dim for objid in object_ids: hist = {} # pose -> prob total_prob = 0 if objid in prior: # prior knowledge provided. Just use the prior knowledge for pose in prior[objid]: state = ObjectState(objid, "target", pose) hist[state] = prior[objid][pose] total_prob += hist[state] else: # no prior knowledge. So uniform. for x in range(width): for y in range(length): state = ObjectState(objid, "target", (x, y)) hist[state] = 1.0 total_prob += hist[state] # Normalize for state in hist: hist[state] /= total_prob hist_belief = pomdp_py.Histogram(hist) oo_hists[objid] = hist_belief # For the robot, we assume it can observe its own state; # Its pose must have been provided in the `prior`. assert robot_id in prior, "Missing initial robot pose in prior." init_robot_pose = list(prior[robot_id].keys())[0] oo_hists[robot_id] =\ pomdp_py.Histogram({RobotState(robot_id, init_robot_pose, (), None): 1.0}) return MosOOBelief(robot_id, oo_hists)
def main(): init_true_state = random.choice( [State("tiger-left"), State("tiger-right")]) init_belief = pomdp_py.Histogram({ State("tiger-left"): 0.5, State("tiger-right"): 0.5 }) tiger_problem = TigerProblem( 0.15, # observation noise init_true_state, init_belief) print("** Testing value iteration **") vi = pomdp_py.ValueIteration(horizon=3, discount_factor=0.95) test_planner(tiger_problem, vi, nsteps=3) # Reset agent belief tiger_problem.agent.set_belief(init_belief, prior=True) print("\n** Testing POUCT **") pouct = pomdp_py.POUCT(max_depth=3, discount_factor=0.95, num_sims=4096, exploration_const=200, rollout_policy=tiger_problem.agent.policy_model) test_planner(tiger_problem, pouct, nsteps=10) pomdp_py.visual.visualize_pouct_search_tree(tiger_problem.agent.tree, max_depth=5, anonymize=False) # Reset agent belief tiger_problem.agent.set_belief(init_belief, prior=True) tiger_problem.agent.tree = None print("** Testing POMCP **") tiger_problem.agent.set_belief(pomdp_py.Particles.from_histogram( init_belief, num_particles=100), prior=True) pomcp = pomdp_py.POMCP(max_depth=3, discount_factor=0.95, num_sims=1000, exploration_const=200, rollout_policy=tiger_problem.agent.policy_model) test_planner(tiger_problem, pomcp, nsteps=10) pomdp_py.visual.visualize_pouct_search_tree(tiger_problem.agent.tree, max_depth=5, anonymize=False)
def main(): init_true_state = random.choice( [TigerState("tiger-left"), TigerState("tiger-right")]) init_belief = pomdp_py.Histogram({ TigerState("tiger-left"): 0.5, TigerState("tiger-right"): 0.5 }) tiger_problem = TigerProblem( 0.15, # observation noise init_true_state, init_belief) print("** Testing value iteration **") vi = pomdp_py.ValueIteration(horizon=3, discount_factor=0.95) test_planner(tiger_problem, vi, nsteps=3) # Reset agent belief tiger_problem.agent.set_belief(init_belief, prior=True) print("\n** Testing POUCT **") pouct = pomdp_py.POUCT(max_depth=3, discount_factor=0.95, num_sims=4096, exploration_const=50, rollout_policy=tiger_problem.agent.policy_model, show_progress=True) test_planner(tiger_problem, pouct, nsteps=10) TreeDebugger(tiger_problem.agent.tree).pp # Reset agent belief tiger_problem.agent.set_belief(init_belief, prior=True) tiger_problem.agent.tree = None print("** Testing POMCP **") tiger_problem.agent.set_belief(pomdp_py.Particles.from_histogram( init_belief, num_particles=100), prior=True) pomcp = pomdp_py.POMCP(max_depth=3, discount_factor=0.95, num_sims=1000, exploration_const=50, rollout_policy=tiger_problem.agent.policy_model, show_progress=True, pbar_update_interval=500) test_planner(tiger_problem, pomcp, nsteps=10) TreeDebugger(tiger_problem.agent.tree).pp
def create(state="tiger-left", belief=0.5, obs_noise=0.15): """ Args: state (str): could be 'tiger-left' or 'tiger-right'; True state of the environment belief (float): Initial belief that the target is on the left; Between 0-1. obs_noise (float): Noise for the observation model (default 0.15) """ init_true_state = TigerState(state) init_belief = pomdp_py.Histogram({ TigerState("tiger-left"): belief, TigerState("tiger-right"): 1.0 - belief }) tiger_problem = TigerProblem(obs_noise, init_true_state, init_belief) tiger_problem.agent.set_belief(init_belief, prior=True) return tiger_problem
def create_case(noise=0.15, init_state="tiger-left"): """Create a tiger problem instance with uniform belief""" from pomdp_problems.tiger.tiger_problem import TigerProblem, State tiger = TigerProblem( noise, State(init_state), pomdp_py.Histogram({ State("tiger-left"): 0.5, State("tiger-right"): 0.5 })) T = tiger.agent.transition_model O = tiger.agent.observation_model S = T.get_all_states() Z = O.get_all_observations() A = tiger.agent.policy_model.get_all_actions() R = tiger.agent.reward_model gamma = 0.95 b0 = tiger.agent.belief s0 = tiger.env.state return b0, s0, S, A, Z, T, O, R, gamma
def main(): ## Setting 1: ## The values are set according to the paper. setting1 = { "obs_probs": { # next_state -> action -> observation "tiger-left":{ "open-left": {"tiger-left": 0.5, "tiger-right": 0.5}, "open-right": {"tiger-left": 0.5, "tiger-right": 0.5}, "listen": {"tiger-left": 0.85, "tiger-right": 0.15} }, "tiger-right":{ "open-left": {"tiger-left": 0.5, "tiger-right": 0.5}, "open-right": {"tiger-left": 0.5, "tiger-right": 0.5}, "listen": {"tiger-left": 0.15, "tiger-right": 0.85} } }, "trans_probs": { # state -> action -> next_state "tiger-left":{ "open-left": {"tiger-left": 0.5, "tiger-right": 0.5}, "open-right": {"tiger-left": 0.5, "tiger-right": 0.5}, "listen": {"tiger-left": 1.0, "tiger-right": 0.0} }, "tiger-right":{ "open-left": {"tiger-left": 0.5, "tiger-right": 0.5}, "open-right": {"tiger-left": 0.5, "tiger-right": 0.5}, "listen": {"tiger-left": 0.0, "tiger-right": 1.0} } } } ## Setting 2: ## Based on my understanding of T and O; Treat the state as given. setting2 = { "obs_probs": { # next_state -> action -> observation "tiger-left":{ "open-left": {"tiger-left": 1.0, "tiger-right": 0.0}, "open-right": {"tiger-left": 1.0, "tiger-right": 0.0}, "listen": {"tiger-left": 0.85, "tiger-right": 0.15} }, "tiger-right":{ "open-left": {"tiger-left": 0.0, "tiger-right": 1.0}, "open-right": {"tiger-left": 0.0, "tiger-right": 1.0}, "listen": {"tiger-left": 0.15, "tiger-right": 0.85} } }, "trans_probs": { # state -> action -> next_state "tiger-left":{ "open-left": {"tiger-left": 1.0, "tiger-right": 0.0}, "open-right": {"tiger-left": 1.0, "tiger-right": 0.0}, "listen": {"tiger-left": 1.0, "tiger-right": 0.0} }, "tiger-right":{ "open-left": {"tiger-left": 0.0, "tiger-right": 1.0}, "open-right": {"tiger-left": 0.0, "tiger-right": 1.0}, "listen": {"tiger-left": 0.0, "tiger-right": 1.0} } } } # setting1 resets the problem after the agent chooses open-left or open-right; # It's reasonable - when the agent opens one door and sees a tiger, it gets # killed; when the agent sees a treasure, it will take some of the treasure # and leave. Thus, the agent will have no incentive to close the door and do # this again. The setting2 is the case where the agent is a robot, and only # cares about getting higher reward. setting = build_setting(setting1) init_true_state = random.choice(list(TigerProblem.STATES)) init_belief = pomdp_py.Histogram({State("tiger-left"): 0.5, State("tiger-right"): 0.5}) tiger_problem = TigerProblem(setting['obs_probs'], setting['trans_probs'], init_true_state, init_belief) # Value iteration print("** Testing value iteration **") vi = pomdp_py.ValueIteration(horizon=2, discount_factor=0.99) test_planner(tiger_problem, vi, nsteps=3) # Reset agent belief tiger_problem.agent.set_belief(init_belief, prior=True) print("\n** Testing POUCT **") pouct = pomdp_py.POUCT(max_depth=10, discount_factor=0.95, num_sims=4096, exploration_const=110, rollout_policy=tiger_problem.agent.policy_model) test_planner(tiger_problem, pouct, nsteps=10) pomdp_py.visual.visualize_pouct_search_tree(tiger_problem.agent.tree, max_depth=5, anonymize=False) # Reset agent belief tiger_problem.agent.set_belief(init_belief, prior=True) tiger_problem.agent.tree = None print("** Testing POMCP **") tiger_problem.agent.set_belief(pomdp_py.Particles.from_histogram(init_belief, num_particles=100), prior=True) pomcp = pomdp_py.POMCP(max_depth=10, discount_factor=0.95, num_sims=1000, exploration_const=110, rollout_policy=tiger_problem.agent.policy_model) test_planner(tiger_problem, pomcp, nsteps=10) pomdp_py.visual.visualize_pouct_search_tree(tiger_problem.agent.tree, max_depth=5, anonymize=False)
def get_distribution(self, state, action, next_state, **kwargs): """Returns the underlying distribution of the model""" reward = self._reward_func(state, action) return pomdp_py.Histogram({reward:1.0})
def get_distribution(self, state, action, **kwargs): """Returns the underlying distribution of the model""" return pomdp_py.Histogram(self._probs[state][action])
def get_distribution(self, next_state, action, **kwargs): """Returns the underlying distribution of the model; In this case, it's just a histogram""" return pomdp_py.Histogram(self._probs[next_state][action])