def test_scenario_1_s(self):
        # Smaller version of the corridor collisions scenario above
        # to facilitate DRL training
        scenario_1_mdp = OvercookedGridworld.from_layout_name(
            'scenario1_s', start_order_list=['any'], cook_time=5)
        mlp = MediumLevelPlanner.from_pickle_or_compute(
            scenario_1_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
        a0 = GreedyHumanModel(mlp)
        a1 = CoupledPlanningAgent(mlp)
        agent_pair = AgentPair(a0, a1)
        start_state = OvercookedState(
            [P((2, 1), s, Obj('onion', (2, 1))),
             P((4, 2), s)], {},
            order_list=['onion'])
        env = OvercookedEnv.from_mdp(scenario_1_mdp,
                                     start_state_fn=lambda: start_state)
        trajectory, time_taken_hr, _, _ = env.run_agents(
            agent_pair, include_final_state=True, display=DISPLAY)
        env.reset()

        print("\n" * 5)
        print("-" * 50)

        a0 = CoupledPlanningAgent(mlp)
        a1 = CoupledPlanningAgent(mlp)
        agent_pair = AgentPair(a0, a1)
        trajectory, time_taken_rr, _, _ = env.run_agents(
            agent_pair, include_final_state=True, display=DISPLAY)

        print("H+R time taken: ", time_taken_hr)
        print("R+R time taken: ", time_taken_rr)
        self.assertGreater(time_taken_hr, time_taken_rr)
def get_bc_agent_from_model(model, bc_params, no_waits=False):
    mdp = OvercookedGridworld.from_layout_name(**bc_params["mdp_params"])
    mlp = MediumLevelPlanner.from_pickle_or_compute(mdp, NO_COUNTERS_PARAMS, force_compute=False)
    
    def encoded_state_policy(observations, include_waits=True, stochastic=False):
        action_probs_n = model.action_probability(observations)

        if not include_waits:
            action_probs = ImitationAgentFromPolicy.remove_indices_and_renormalize(
                action_probs_n, [Action.ACTION_TO_INDEX[Direction.STAY]])
        
        if stochastic:
            return [np.random.choice(len(action_probs[i]), p=action_probs[i]) for i in range(len(action_probs))]
        return action_probs_n

    def state_policy(mdp_states, agent_indices, include_waits, stochastic=False):
        # encode_fn = lambda s: mdp.preprocess_observation(s)
        encode_fn = lambda s: mdp.featurize_state(s, mlp)

        obs = []
        for agent_idx, s in zip(agent_indices, mdp_states):
            ob = encode_fn(s)[agent_idx]
            obs.append(ob)
        obs = np.array(obs)
        action_probs = encoded_state_policy(obs, include_waits, stochastic)
        return action_probs

    return ImitationAgentFromPolicy(state_policy, encoded_state_policy, no_waits=no_waits, mlp=mlp)
 def test_scenario_1(self):
     # Myopic corridor collision
     #
     # X X X X X O X D X X X X X
     # X   ↓Ho     X           X
     # X     X X X X X X X ↓R  X
     # X                       X
     # X S X X X X X X X X P P X
     #
     # H on left with onion, further away to the tunnel entrance than R.
     # Optimal planner tells R to go first and that H will wait
     # for R to pass. H however, starts going through the tunnel
     # and they get stuck. The H plan is a bit extreme (it would probably
     # realize that it should retrace it's steps at some point)
     scenario_1_mdp = OvercookedGridworld.from_layout_name(
         'small_corridor', start_order_list=['any'], cook_time=5)
     mlp = MediumLevelPlanner.from_pickle_or_compute(
         scenario_1_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
     a0 = GreedyHumanModel(mlp)
     a1 = CoupledPlanningAgent(mlp)
     agent_pair = AgentPair(a0, a1)
     start_state = OvercookedState(
         [P((2, 1), s, Obj('onion', (2, 1))),
          P((10, 2), s)], {},
         order_list=['onion'])
     env = OvercookedEnv.from_mdp(scenario_1_mdp,
                                  start_state_fn=lambda: start_state)
     env.run_agents(agent_pair, include_final_state=True, display=DISPLAY)
示例#4
0
 def setUp(self):
     self.base_mdp = OvercookedGridworld.from_layout_name("cramped_room")
     self.mlp = MediumLevelPlanner.from_pickle_or_compute(
         self.base_mdp, NO_COUNTERS_PARAMS, force_compute=True)
     self.env = OvercookedEnv(self.base_mdp, **DEFAULT_ENV_PARAMS)
     self.rnd_agent_pair = AgentPair(GreedyHumanModel(self.mlp),
                                     GreedyHumanModel(self.mlp))
     np.random.seed(0)
示例#5
0
 def mlp(self):
     assert not self.variable_mdp, "Variable mdp is not currently supported for planning"
     if self._mlp is None:
         if self.debug: print("Computing Planner")
         self._mlp = MediumLevelPlanner.from_pickle_or_compute(
             self.env.mdp,
             self.mlp_params,
             force_compute=self.force_compute)
     return self._mlp
def init_gym_env(bc_params):
    env_setup_params = copy.deepcopy(bc_params)
    del env_setup_params["data_params"]  # Not necessary for setting up env
    mdp = OvercookedGridworld.from_layout_name(**bc_params["mdp_params"])
    env = OvercookedEnv(mdp, **bc_params["env_params"])
    gym_env = gym.make("Overcooked-v0")
    
    mlp = MediumLevelPlanner.from_pickle_or_compute(mdp, NO_COUNTERS_PARAMS, force_compute=False)
    gym_env.custom_init(env, featurize_fn=lambda x: mdp.featurize_state(x, mlp))
    return gym_env
示例#7
0
def joint_state_trajectory_to_single(trajectories, joint_traj_data, traj_metadata, player_indices_to_convert=None, processed=True):
    """
    Take a joint trajectory and split it into two single-agent trajectories, adding data to the `trajectories` dictionary

    player_indices_to_convert: which player indexes' trajs we should return
    """
    from overcooked_ai_py.planning.planners import MediumLevelPlanner, NO_COUNTERS_PARAMS

    mdp = traj_metadata['mdp']
    mlp = MediumLevelPlanner.from_pickle_or_compute(
            mdp=mdp,
            mlp_params=NO_COUNTERS_PARAMS,
            force_compute=False
    )

    assert len(joint_traj_data['ep_observations']) == 1, "This method only takes in one trajectory"
    states, joint_actions = joint_traj_data['ep_observations'][0], joint_traj_data['ep_actions'][0]
    rewards, length = joint_traj_data['ep_rewards'][0], joint_traj_data['ep_lengths'][0]

    # Getting trajectory for each agent
    for agent_idx in player_indices_to_convert:

        ep_obs, ep_acts, ep_dones = [], [], []
        for i in range(len(states)):
            state, action = states[i], joint_actions[i][agent_idx]
            
            if processed:
                # Pre-processing (default is state featurization)
                action = np.array([Action.ACTION_TO_INDEX[action]]).astype(int)

                # NOTE: Could parallelize a bit more if slow
                # state = mdp.preprocess_observation(state)[agent_idx]
                state = mdp.featurize_state(state, mlp)[agent_idx]

            ep_obs.append(state)
            ep_acts.append(action)
            ep_dones.append(False)

        if len(ep_obs) == 0:
            worker_id, layout_name = traj_metadata['workerid_num'], mdp.layout_name
            print("{} on layout {} had an empty traj?. Excluding from dataset.".format(worker_id, layout_name))

        ep_dones[-1] = True

        trajectories["ep_observations"].append(ep_obs)
        trajectories["ep_actions"].append(ep_acts)
        trajectories["ep_dones"].append(ep_dones)
        trajectories["ep_rewards"].append(rewards)
        trajectories["ep_returns"].append(sum(rewards))
        trajectories["ep_lengths"].append(length)
        trajectories["ep_agent_idxs"].append(agent_idx)
        trajectories["mdp_params"].append(mdp.mdp_params)
        trajectories["env_params"].append({})
示例#8
0
def ppo_run(params):

    create_dir_if_not_exists(params["SAVE_DIR"])
    save_pickle(params, params["SAVE_DIR"] + "config")

    #############
    # PPO SETUP #
    #############

    train_infos = []

    for seed in params["SEEDS"]:
        reset_tf()
        set_global_seed(seed)

        curr_seed_dir = params["SAVE_DIR"] + "seed" + str(seed) + "/"
        create_dir_if_not_exists(curr_seed_dir)

        save_pickle(params, curr_seed_dir + "config")

        print("Creating env with params", params)
        # Configure mdp
        
        mdp = OvercookedGridworld.from_layout_name(**params["mdp_params"])
        env = OvercookedEnv(mdp, **params["env_params"])
        mlp = MediumLevelPlanner.from_pickle_or_compute(mdp, NO_COUNTERS_PARAMS, force_compute=True) 

        # Configure gym env
        gym_env = get_vectorized_gym_env(
            env, 'Overcooked-v0', featurize_fn=lambda x: mdp.lossless_state_encoding(x), **params
        )
        gym_env.self_play_randomization = 0 if params["SELF_PLAY_HORIZON"] is None else 1
        gym_env.trajectory_sp = params["TRAJECTORY_SELF_PLAY"]
        gym_env.update_reward_shaping_param(1 if params["mdp_params"]["rew_shaping_params"] != 0 else 0)

        configure_other_agent(params, gym_env, mlp, mdp)

        # Create model
        with tf.device('/device:GPU:{}'.format(params["GPU_ID"])):
            model = create_model(gym_env, "ppo_agent", **params)

        # Train model
        params["CURR_SEED"] = seed
        train_info = update_model(gym_env, model, **params)
        
        # Save model
        save_ppo_model(model, curr_seed_dir + model.agent_name)
        print("Saved training info at", curr_seed_dir + "training_info")
        save_pickle(train_info, curr_seed_dir + "training_info")
        train_infos.append(train_info)
    
    return train_infos
 def test_two_coupled_agents_coupled_pair(self):
     mlp_simple = MediumLevelPlanner.from_pickle_or_compute(
         simple_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
     cp_agent = CoupledPlanningAgent(mlp_simple)
     agent_pair = CoupledPlanningPair(cp_agent)
     start_state = OvercookedState([P(
         (2, 2), n), P((2, 1), n)], {},
                                   order_list=['any'])
     env = OvercookedEnv(simple_mdp, start_state_fn=lambda: start_state)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair,
                                                   include_final_state=True,
                                                   display=DISPLAY)
     end_state = trajectory[-1][0]
     self.assertEqual(end_state.order_list, [])
示例#10
0
 def test_two_greedy_human_open_map(self):
     scenario_2_mdp = OvercookedGridworld.from_layout_name(
         'scenario2', start_order_list=['any'], cook_time=5)
     mlp = MediumLevelPlanner.from_pickle_or_compute(
         scenario_2_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute)
     a0 = GreedyHumanModel(mlp)
     a1 = GreedyHumanModel(mlp)
     agent_pair = AgentPair(a0, a1)
     start_state = OvercookedState([P(
         (8, 1), s), P((1, 1), s)], {},
                                   order_list=['onion'])
     env = OvercookedEnv.from_mdp(scenario_2_mdp,
                                  start_state_fn=lambda: start_state,
                                  horizon=100)
     trajectory, time_taken, _, _ = env.run_agents(agent_pair,
                                                   include_final_state=True,
                                                   display=DISPLAY)
     end_state = trajectory[-1][0]
     self.assertEqual(len(end_state.order_list), 0)
示例#11
0
 def setUp(self):
     self.mlp_large = MediumLevelPlanner.from_pickle_or_compute(
         large_mdp, NO_COUNTERS_PARAMS, force_compute=force_compute_large)
simple_mdp = OvercookedGridworld.from_layout_name('simple_tomato',
                                                  start_order_list=['any'],
                                                  cook_time=5)

base_params = {
    'start_orientations': False,
    'wait_allowed': False,
    'counter_goals': simple_mdp.terrain_pos_dict['X'],
    'counter_drop': simple_mdp.terrain_pos_dict['X'][1:2],
    'counter_pickup': [],
    'same_motion_goals': True
}
action_manger_filename = "simple_1_am.pkl"
ml_planner_simple = MediumLevelPlanner.from_pickle_or_compute(
    simple_mdp,
    mlp_params=base_params,
    custom_filename=action_manger_filename,
    force_compute=force_compute)
ml_planner_simple.env = OvercookedEnv(simple_mdp)

base_params_start_or = {
    'start_orientations': True,
    'wait_allowed': False,
    'counter_goals': simple_mdp.terrain_pos_dict['X'],
    'counter_drop': [],
    'counter_pickup': [],
    'same_motion_goals': False
}
action_manger_filename = "simple_2_am.pkl"
or_ml_planner_simple = MediumLevelPlanner.from_pickle_or_compute(
    simple_mdp,