Exemplo n.º 1
0
def test_particles():
    random_dist = {}
    total_prob = 0
    for v in range(4):
        random_dist[f"x{v}"] = random.uniform(0, 1)
        total_prob += random_dist[f"x{v}"]
    for v in random_dist:
        random_dist[v] /= total_prob

    particles = pomdp_py.Particles.from_histogram(
        pomdp_py.Histogram(random_dist), num_particles=int(1e6))

    for v in random_dist:
        assert abs(particles[v] - random_dist[v]) <= 2e-3

    counts = {}
    total = int(1e6)
    for i in range(total):
        v = particles.random()
        counts[v] = counts.get(v, 0) + 1

    for v in counts:
        counts[v] /= total

    for v in random_dist:
        assert abs(counts[v] - random_dist[v]) <= 2e-3

    assert particles.mpe() == pomdp_py.Histogram(random_dist).mpe()
Exemplo n.º 2
0
def test_weighted_particles():
    random_dist = {}
    total_prob = 0
    for v in range(5):
        random_dist[f"x{v}"] = random.uniform(0, 1)
        total_prob += random_dist[f"x{v}"]
    for v in random_dist:
        random_dist[v] /= total_prob

    particles = pomdp_py.WeightedParticles.from_histogram(
        pomdp_py.Histogram(random_dist))

    assert abs(sum(particles[v] for v, _ in particles) - 1.0) <= 1e-6

    for v in random_dist:
        assert abs(particles[v] - random_dist[v]) <= 2e-3

    counts = {}
    total = int(1e6)
    for i in range(total):
        v = particles.random()
        counts[v] = counts.get(v, 0) + 1
    for v in counts:
        counts[v] /= total
    for v in random_dist:
        assert abs(counts[v] - random_dist[v]) <= 2e-3

    assert particles.mpe() == pomdp_py.Histogram(random_dist).mpe()
Exemplo n.º 3
0
def initialize_belief(grid_map, init_robot_position, prior={}):
    """Initialize belief.
    
    Args:
        grid_map (GridMap): Holds information of the map occupancy
        prior (dict): A map from (x,y)->[0,1]. If empty, the belief
            will be uniform."""
    hist = {}  # state -> prob
    total_prob = 0.0
    for target_position in prior:
        state = TagState(init_robot_position, target_position, False)
        hist[state] = prior[target_position]
        total_prob += hist[state]

    for x in range(grid_map.width):
        for y in range(grid_map.length):
            if (x, y) in grid_map.obstacle_poses:
                # Skip obstacles
                continue
            state = TagState(init_robot_position, (x, y), False)
            if len(prior) > 0:
                if (x, y) not in prior:
                    hist[state] = 1e-9
            else:
                hist[state] = 1.0
                total_prob += hist[state]
    # Normalize
    for state in hist:
        hist[state] /= total_prob

    hist_belief = pomdp_py.Histogram(hist)
    return hist_belief
Exemplo n.º 4
0
def belief_update(agent, real_action, real_observation, next_robot_state,
                  planner):
    """Updates the agent's belief; The belief update may happen
    through planner update (e.g. when planner is POMCP)."""
    # Updates the planner; In case of POMCP, agent's belief is also updated.
    planner.update(agent, real_action, real_observation)

    # Update agent's belief, when planner is not POMCP
    if not isinstance(planner, pomdp_py.POMCP):
        # Update belief for every object
        for objid in agent.cur_belief.object_beliefs:
            belief_obj = agent.cur_belief.object_belief(objid)
            if isinstance(belief_obj, pomdp_py.Histogram):
                if objid == agent.robot_id:
                    # Assuming the agent can observe its own state:
                    new_belief = pomdp_py.Histogram({next_robot_state: 1.0})
                else:
                    # This is doing
                    #    B(si') = normalizer * O(oi|si',sr',a) * sum_s T(si'|s,a)*B(si)
                    #
                    # Notes: First, objects are static; Second,
                    # O(oi|s',a) ~= O(oi|si',sr',a) according to the definition
                    # of the observation model in models/observation.py.  Note
                    # that the exact belief update rule for this OOPOMDP needs to use
                    # a model like O(oi|si',sr',a) because it's intractable to
                    # consider s' (that means all combinations of all object
                    # states must be iterated).  Of course, there could be work
                    # around (out of scope) - Consider a volumetric observaiton,
                    # instead of the object-pose observation. That means oi is a
                    # set of pixels (2D) or voxels (3D). Note the real
                    # observation, oi, is most likely sampled from O(oi|s',a)
                    # because real world considers the occlusion between objects
                    # (due to full state s'). The problem is how to compute the
                    # probability of this oi given s' and a, where it's
                    # intractable to obtain s'. To this end, we can make a
                    # simplifying assumption that an object is contained within
                    # one pixel (or voxel); The pixel (or voxel) is labeled to
                    # indicate free space or object. The label of each pixel or
                    # voxel is certainly a result of considering the full state
                    # s. The occlusion can be handled nicely with the volumetric
                    # observation definition. Then that assumption can reduce the
                    # observation model from O(oi|s',a) to O(label_i|s',a) and
                    # it becomes easy to define O(label_i=i|s',a) and O(label_i=FREE|s',a).
                    # These ideas are used in my recent 3D object search work.
                    new_belief = pomdp_py.update_histogram_belief(
                        belief_obj,
                        real_action,
                        real_observation.for_obj(objid),
                        agent.observation_model[objid],
                        agent.transition_model[objid],
                        # The agent knows the objects are static.
                        static_transition=objid != agent.robot_id,
                        oargs={"next_robot_state": next_robot_state})
            else:
                raise ValueError("Unexpected program state."\
                                 "Are you using the appropriate belief representation?")

            agent.cur_belief.set_object_belief(objid, new_belief)
Exemplo n.º 5
0
def make_tiger(noise=0.15, init_state="tiger-left", init_belief=[0.5, 0.5]):
    """Convenient function to quickly build a tiger domain.
    Useful for testing"""
    tiger = TigerProblem(
        noise, TigerState(init_state),
        pomdp_py.Histogram({
            TigerState("tiger-left"): init_belief[0],
            TigerState("tiger-right"): init_belief[1]
        }))
    return tiger
Exemplo n.º 6
0
def _initialize_histogram_belief(dim, robot_id, object_ids, prior,
                                 robot_orientations):
    """
    Returns the belief distribution represented as a histogram
    """
    oo_hists = {}  # objid -> Histogram
    width, length = dim
    for objid in object_ids:
        hist = {}  # pose -> prob
        total_prob = 0
        if objid in prior:
            # prior knowledge provided. Just use the prior knowledge
            for pose in prior[objid]:
                state = ObjectState(objid, "target", pose)
                hist[state] = prior[objid][pose]
                total_prob += hist[state]
        else:
            # no prior knowledge. So uniform.
            for x in range(width):
                for y in range(length):
                    state = ObjectState(objid, "target", (x, y))
                    hist[state] = 1.0
                    total_prob += hist[state]

        # Normalize
        for state in hist:
            hist[state] /= total_prob

        hist_belief = pomdp_py.Histogram(hist)
        oo_hists[objid] = hist_belief

    # For the robot, we assume it can observe its own state;
    # Its pose must have been provided in the `prior`.
    assert robot_id in prior, "Missing initial robot pose in prior."
    init_robot_pose = list(prior[robot_id].keys())[0]
    oo_hists[robot_id] =\
        pomdp_py.Histogram({RobotState(robot_id, init_robot_pose, (), None): 1.0})

    return MosOOBelief(robot_id, oo_hists)
Exemplo n.º 7
0
def main():
    init_true_state = random.choice(
        [State("tiger-left"), State("tiger-right")])
    init_belief = pomdp_py.Histogram({
        State("tiger-left"): 0.5,
        State("tiger-right"): 0.5
    })
    tiger_problem = TigerProblem(
        0.15,  # observation noise
        init_true_state,
        init_belief)

    print("** Testing value iteration **")
    vi = pomdp_py.ValueIteration(horizon=3, discount_factor=0.95)
    test_planner(tiger_problem, vi, nsteps=3)

    # Reset agent belief
    tiger_problem.agent.set_belief(init_belief, prior=True)

    print("\n** Testing POUCT **")
    pouct = pomdp_py.POUCT(max_depth=3,
                           discount_factor=0.95,
                           num_sims=4096,
                           exploration_const=200,
                           rollout_policy=tiger_problem.agent.policy_model)
    test_planner(tiger_problem, pouct, nsteps=10)

    pomdp_py.visual.visualize_pouct_search_tree(tiger_problem.agent.tree,
                                                max_depth=5,
                                                anonymize=False)

    # Reset agent belief
    tiger_problem.agent.set_belief(init_belief, prior=True)
    tiger_problem.agent.tree = None

    print("** Testing POMCP **")
    tiger_problem.agent.set_belief(pomdp_py.Particles.from_histogram(
        init_belief, num_particles=100),
                                   prior=True)
    pomcp = pomdp_py.POMCP(max_depth=3,
                           discount_factor=0.95,
                           num_sims=1000,
                           exploration_const=200,
                           rollout_policy=tiger_problem.agent.policy_model)
    test_planner(tiger_problem, pomcp, nsteps=10)

    pomdp_py.visual.visualize_pouct_search_tree(tiger_problem.agent.tree,
                                                max_depth=5,
                                                anonymize=False)
Exemplo n.º 8
0
def main():
    init_true_state = random.choice(
        [TigerState("tiger-left"),
         TigerState("tiger-right")])
    init_belief = pomdp_py.Histogram({
        TigerState("tiger-left"): 0.5,
        TigerState("tiger-right"): 0.5
    })
    tiger_problem = TigerProblem(
        0.15,  # observation noise
        init_true_state,
        init_belief)

    print("** Testing value iteration **")
    vi = pomdp_py.ValueIteration(horizon=3, discount_factor=0.95)
    test_planner(tiger_problem, vi, nsteps=3)

    # Reset agent belief
    tiger_problem.agent.set_belief(init_belief, prior=True)

    print("\n** Testing POUCT **")
    pouct = pomdp_py.POUCT(max_depth=3,
                           discount_factor=0.95,
                           num_sims=4096,
                           exploration_const=50,
                           rollout_policy=tiger_problem.agent.policy_model,
                           show_progress=True)
    test_planner(tiger_problem, pouct, nsteps=10)
    TreeDebugger(tiger_problem.agent.tree).pp

    # Reset agent belief
    tiger_problem.agent.set_belief(init_belief, prior=True)
    tiger_problem.agent.tree = None

    print("** Testing POMCP **")
    tiger_problem.agent.set_belief(pomdp_py.Particles.from_histogram(
        init_belief, num_particles=100),
                                   prior=True)
    pomcp = pomdp_py.POMCP(max_depth=3,
                           discount_factor=0.95,
                           num_sims=1000,
                           exploration_const=50,
                           rollout_policy=tiger_problem.agent.policy_model,
                           show_progress=True,
                           pbar_update_interval=500)
    test_planner(tiger_problem, pomcp, nsteps=10)
    TreeDebugger(tiger_problem.agent.tree).pp
Exemplo n.º 9
0
 def create(state="tiger-left", belief=0.5, obs_noise=0.15):
     """
     Args:
         state (str): could be 'tiger-left' or 'tiger-right';
                      True state of the environment
         belief (float): Initial belief that the target is
                         on the left; Between 0-1.
         obs_noise (float): Noise for the observation
                            model (default 0.15)
     """
     init_true_state = TigerState(state)
     init_belief = pomdp_py.Histogram({
         TigerState("tiger-left"):
         belief,
         TigerState("tiger-right"):
         1.0 - belief
     })
     tiger_problem = TigerProblem(obs_noise, init_true_state, init_belief)
     tiger_problem.agent.set_belief(init_belief, prior=True)
     return tiger_problem
Exemplo n.º 10
0
def create_case(noise=0.15, init_state="tiger-left"):
    """Create a tiger problem instance with uniform belief"""
    from pomdp_problems.tiger.tiger_problem import TigerProblem, State
    tiger = TigerProblem(
        noise, State(init_state),
        pomdp_py.Histogram({
            State("tiger-left"): 0.5,
            State("tiger-right"): 0.5
        }))
    T = tiger.agent.transition_model
    O = tiger.agent.observation_model
    S = T.get_all_states()
    Z = O.get_all_observations()
    A = tiger.agent.policy_model.get_all_actions()
    R = tiger.agent.reward_model
    gamma = 0.95

    b0 = tiger.agent.belief
    s0 = tiger.env.state
    return b0, s0, S, A, Z, T, O, R, gamma
Exemplo n.º 11
0
def main():
    ## Setting 1:
    ## The values are set according to the paper.
    setting1 = {
        "obs_probs": {  # next_state -> action -> observation
            "tiger-left":{ 
                "open-left": {"tiger-left": 0.5, "tiger-right": 0.5},
                "open-right": {"tiger-left": 0.5, "tiger-right": 0.5},
                "listen": {"tiger-left": 0.85, "tiger-right": 0.15}
            },
            "tiger-right":{
                "open-left": {"tiger-left": 0.5, "tiger-right": 0.5},
                "open-right": {"tiger-left": 0.5, "tiger-right": 0.5},
                "listen": {"tiger-left": 0.15, "tiger-right": 0.85}
            }
        },
        
        "trans_probs": {  # state -> action -> next_state
            "tiger-left":{ 
                "open-left": {"tiger-left": 0.5, "tiger-right": 0.5},
                "open-right": {"tiger-left": 0.5, "tiger-right": 0.5},
                "listen": {"tiger-left": 1.0, "tiger-right": 0.0}
            },
            "tiger-right":{
                "open-left": {"tiger-left": 0.5, "tiger-right": 0.5},
                "open-right": {"tiger-left": 0.5, "tiger-right": 0.5},
                "listen": {"tiger-left": 0.0, "tiger-right": 1.0}
            }
        }
    }

    ## Setting 2:
    ## Based on my understanding of T and O; Treat the state as given.
    setting2 = {
        "obs_probs": {  # next_state -> action -> observation
            "tiger-left":{ 
                "open-left": {"tiger-left": 1.0, "tiger-right": 0.0},
                "open-right": {"tiger-left": 1.0, "tiger-right": 0.0},
                "listen": {"tiger-left": 0.85, "tiger-right": 0.15}
            },
            "tiger-right":{
                "open-left": {"tiger-left": 0.0, "tiger-right": 1.0},
                "open-right": {"tiger-left": 0.0, "tiger-right": 1.0},
                "listen": {"tiger-left": 0.15, "tiger-right": 0.85}
            }
        },
        
        "trans_probs": {  # state -> action -> next_state
            "tiger-left":{ 
                "open-left": {"tiger-left": 1.0, "tiger-right": 0.0},
                "open-right": {"tiger-left": 1.0, "tiger-right": 0.0},
                "listen": {"tiger-left": 1.0, "tiger-right": 0.0}
            },
            "tiger-right":{
                "open-left": {"tiger-left": 0.0, "tiger-right": 1.0},
                "open-right": {"tiger-left": 0.0, "tiger-right": 1.0},
                "listen": {"tiger-left": 0.0, "tiger-right": 1.0}
            }
        }
    }

    # setting1 resets the problem after the agent chooses open-left or open-right;
    # It's reasonable - when the agent opens one door and sees a tiger, it gets
    # killed; when the agent sees a treasure, it will take some of the treasure
    # and leave. Thus, the agent will have no incentive to close the door and do
    # this again. The setting2 is the case where the agent is a robot, and only
    # cares about getting higher reward.
    setting = build_setting(setting1)

    init_true_state = random.choice(list(TigerProblem.STATES))
    init_belief = pomdp_py.Histogram({State("tiger-left"): 0.5,
                                      State("tiger-right"): 0.5})
    tiger_problem = TigerProblem(setting['obs_probs'],
                                 setting['trans_probs'],
                                 init_true_state, init_belief)

    # Value iteration
    print("** Testing value iteration **")
    vi = pomdp_py.ValueIteration(horizon=2, discount_factor=0.99)
    test_planner(tiger_problem, vi, nsteps=3)

    # Reset agent belief
    tiger_problem.agent.set_belief(init_belief, prior=True)

    print("\n** Testing POUCT **")
    pouct = pomdp_py.POUCT(max_depth=10, discount_factor=0.95,
                           num_sims=4096, exploration_const=110,
                           rollout_policy=tiger_problem.agent.policy_model)
    test_planner(tiger_problem, pouct, nsteps=10)

    pomdp_py.visual.visualize_pouct_search_tree(tiger_problem.agent.tree,
                                                max_depth=5, anonymize=False)

    # Reset agent belief    
    tiger_problem.agent.set_belief(init_belief, prior=True)
    tiger_problem.agent.tree = None
    
    print("** Testing POMCP **")
    tiger_problem.agent.set_belief(pomdp_py.Particles.from_histogram(init_belief, num_particles=100), prior=True)
    pomcp = pomdp_py.POMCP(max_depth=10, discount_factor=0.95,
                           num_sims=1000, exploration_const=110,
                           rollout_policy=tiger_problem.agent.policy_model)
    test_planner(tiger_problem, pomcp, nsteps=10)
    
    pomdp_py.visual.visualize_pouct_search_tree(tiger_problem.agent.tree,
                                                max_depth=5, anonymize=False)
Exemplo n.º 12
0
 def get_distribution(self, state, action, next_state, **kwargs):
     """Returns the underlying distribution of the model"""
     reward = self._reward_func(state, action)
     return pomdp_py.Histogram({reward:1.0})
Exemplo n.º 13
0
 def get_distribution(self, state, action, **kwargs):
     """Returns the underlying distribution of the model"""
     return pomdp_py.Histogram(self._probs[state][action])
Exemplo n.º 14
0
 def get_distribution(self, next_state, action, **kwargs):
     """Returns the underlying distribution of the model; In this case, it's just a histogram"""
     return pomdp_py.Histogram(self._probs[next_state][action])