Exemplo n.º 1
0
def make_env(n_substeps=5,
             horizon=250,
             deterministic_mode=False,
             n_agents=2,
             n_boxes=2,
             n_ramps=1):
    '''
        This make_env function is not used anywhere; it exists to provide a simple, bare-bones
        example of how to construct a multi-agent environment using the modules framework.
    '''
    env = Base(n_agents=n_agents,
               n_substeps=n_substeps,
               horizon=horizon,
               deterministic_mode=deterministic_mode)
    env.add_module(
        RandomWalls(grid_size=30, num_rooms=4, min_room_size=6, door_size=2))
    if n_boxes > 0:
        env.add_module(Boxes(n_boxes=n_boxes))
    if n_ramps > 0:
        env.add_module(Ramps(n_ramps=n_ramps))
    env.add_module(Agents(n_agents))
    env.reset()
    keys_self = ['agent_qpos_qvel']
    keys_mask_self = ['mask_aa_obs']
    keys_external = ['agent_qpos_qvel']
    keys_mask_external = []
    env = SplitMultiAgentActions(env)
    env = DiscretizeActionWrapper(env, 'action_movement')
    env = AgentAgentObsMask2D(env)
    env = SplitObservations(env, keys_self + keys_mask_self)
    env = SelectKeysWrapper(env,
                            keys_self=keys_self,
                            keys_external=keys_external,
                            keys_mask=keys_mask_self + keys_mask_external,
                            flatten=False)
    env = DiscardMujocoExceptionEpisodes(env)
    return env
def BoxLockingEnv(args, n_substeps=15, horizon=120, deterministic_mode=False,
             floor_size=6.0, grid_size=30, door_size=2,
             n_agents=1, fixed_agent_spawn=False,
             lock_box=True, grab_box=True, grab_selective=False,
             lock_type='all_lock_team_specific',
             lock_grab_radius=0.25, grab_exclusive=False, grab_out_of_vision=False,
             lock_out_of_vision=False,
             box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50],
             action_lims=(-0.9, 0.9), polar_obs=True,
             scenario='quadrant', p_door_dropout=0.5,
             n_rooms=2, random_room_number=False,
             n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None,
             n_boxes=3, box_size=0.5, box_only_z_rot=True,
             boxid_obs=False, boxsize_obs=False, pad_ramp_size=True, additional_obs={},
             # lock-box task
             task_type='order-return', lock_reward=5.0, unlock_penalty=5.0, shaped_reward_scale=0.5,
             return_threshold=0.1,
             # ramps
             n_ramps=0):

    scenario = args.scenario_name
    n_agents = args.num_agents
    task_type = args.task_type
    n_boxes = args.num_boxes
    #assert n_agents==1, ("only 1 agents is supported, check the config.py.")

    grab_radius_multiplier = lock_grab_radius / box_size
    lock_radius_multiplier = lock_grab_radius / box_size

    env = Base(n_agents=n_agents, n_substeps=n_substeps,
               floor_size=floor_size,
               horizon=horizon, action_lims=action_lims, deterministic_mode=deterministic_mode,
               grid_size=grid_size)

    if scenario == 'randomwalls':
        env.add_module(RandomWalls(n_agents=n_agents, grid_size=grid_size, num_rooms=n_rooms,
                                   random_room_number=random_room_number,
                                   min_room_size=6, door_size=door_size,
                                   gen_door_obs=False))
        box_placement_fn = uniform_placement
        ramp_placement_fn = uniform_placement
        agent_placement_fn = uniform_placement if not fixed_agent_spawn else center_placement
    elif scenario == 'quadrant':
        env.add_module(WallScenarios(n_agents=n_agents, grid_size=grid_size, door_size=door_size,
                                     scenario=scenario, friction=other_friction,
                                     p_door_dropout=p_door_dropout))
        box_placement_fn = uniform_placement
        ramp_placement_fn = uniform_placement
        agent_placement_fn = quadrant_placement if not fixed_agent_spawn else center_placement
    elif scenario == 'empty':
        env.add_module(WallScenarios(n_agents=n_agents, grid_size=grid_size, door_size=2, scenario='empty'))
        box_placement_fn = uniform_placement
        ramp_placement_fn = uniform_placement
        agent_placement_fn = center_placement
    elif 'var_tri' in scenario:
        env.add_module(WallScenarios(n_agents=n_agents, grid_size=grid_size, door_size=door_size, scenario='var_tri'))
        ramp_placement_fn = [tri_placement(i % 3) for i in range(n_ramps)]
        agent_placement_fn = center_placement if fixed_agent_spawn else \
            (uniform_placement if 'uniform' in scenario else rotate_tri_placement)
        box_placement_fn = uniform_placement if 'uniform' in scenario else rotate_tri_placement
    else:
        raise ValueError(f"Scenario {scenario} not supported.")

    env.add_module(Agents(n_agents,
                          placement_fn=agent_placement_fn,
                          color=[np.array((66., 235., 244., 255.)) / 255] * n_agents,
                          friction=other_friction,
                          polar_obs=polar_obs))
    if np.max(n_boxes) > 0:
        env.add_module(Boxes(n_boxes=n_boxes, placement_fn=box_placement_fn,
                             friction=box_floor_friction, polar_obs=polar_obs,
                             n_elongated_boxes=0,
                             boxid_obs=boxid_obs,
                             box_only_z_rot=box_only_z_rot,
                             boxsize_obs=boxsize_obs))

    if n_ramps > 0:
        env.add_module(Ramps(n_ramps=n_ramps, placement_fn=ramp_placement_fn,
                             friction=other_friction, polar_obs=polar_obs,
                             pad_ramp_size=pad_ramp_size))

    if n_lidar_per_agent > 0 and visualize_lidar:
        env.add_module(LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent))

    if np.max(n_boxes) > 0 and grab_box:
        env.add_module(AgentManipulation())
    if box_floor_friction is not None:
        env.add_module(FloorAttributes(friction=box_floor_friction))
    env.add_module(WorldConstants(gravity=gravity))
    env.reset()
    if 'var_tri' in scenario or "randomwalls" in scenario:
        keys_self = ['agent_qpos_qvel','current_step']
    else:
        keys_self = ['agent_qpos_qvel','current_step','vector_door_obs']
    keys_mask_self = ['mask_aa_obs']
    keys_external = ['agent_qpos_qvel']
    keys_copy = ['you_lock', 'team_lock']
    keys_mask_external = []

    env = SplitMultiAgentActions(env)
    env = TeamMembership(env, np.zeros((n_agents,)))
    env = AgentAgentObsMask2D(env)
    env = DiscretizeActionWrapper(env, 'action_movement')
    env = NumpyArrayRewardWrapper(env)
    env = TimeWrapper(env, horizon)
    if np.max(n_boxes) > 0:
        env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs',
                                 geom_idxs_obs_key='box_geom_idxs')
        keys_external += ['mask_ab_obs', 'box_obs']
        keys_mask_external.append('mask_ab_obs')
    if lock_box and np.max(n_boxes) > 0:
        env = LockObjWrapper(env, body_names=[f'moveable_box{i}' for i in range(n_boxes)],
                             agent_idx_allowed_to_lock=np.arange(n_agents),
                             lock_type=lock_type,
                             radius_multiplier=lock_radius_multiplier,
                             obj_in_game_metadata_keys=["curr_n_boxes"],
                             agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ab_obs"])

    if n_ramps > 0:
        env = AgentGeomObsMask2D(env, pos_obs_key='ramp_pos', mask_obs_key='mask_ar_obs',
                                 geom_idxs_obs_key='ramp_geom_idxs')
        env = LockObjWrapper(env, body_names=[f"ramp{i}:ramp" for i in range(n_ramps)],
                             agent_idx_allowed_to_lock=np.arange(n_agents),
                             lock_type=lock_type, ac_obs_prefix='ramp_',
                             radius_multiplier=lock_radius_multiplier,
                             agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ar_obs"])

        keys_external += ['ramp_obs']
        keys_mask_external += ['mask_ar_obs']
        keys_copy += ['ramp_you_lock', 'ramp_team_lock']

    if grab_box and np.max(n_boxes) > 0:
        body_names = ([f'moveable_box{i}' for i in range(n_boxes)] +
                      [f"ramp{i}:ramp" for i in range(n_ramps)])
        obj_in_game_meta_keys = ['curr_n_boxes'] + (['curr_n_ramps'] if n_ramps > 0 else [])
        env = GrabObjWrapper(env,
                             body_names=body_names,
                             radius_multiplier=grab_radius_multiplier,
                             grab_exclusive=grab_exclusive,
                             obj_in_game_metadata_keys=obj_in_game_meta_keys)

    if n_lidar_per_agent > 0:
        env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar,
                    compress_lidar_scale=compress_lidar_scale)
        keys_copy += ['lidar']
        keys_external += ['lidar']

    env = AddConstantObservationsWrapper(env, new_obs=additional_obs)
    keys_external += list(additional_obs)
    keys_mask_external += [ob for ob in additional_obs if 'mask' in ob]

    #############################################
    # lock Box Task Reward
    ###
    env = LockObjectsTask(env, n_objs=n_boxes, task=task_type, fixed_order=True,
                          obj_lock_obs_key='obj_lock', obj_pos_obs_key='box_pos',
                          act_lock_key='action_glue', agent_pos_key='agent_pos',
                          lock_reward=lock_reward, unlock_penalty=unlock_penalty,
                          shaped_reward_scale=shaped_reward_scale,
                          return_threshold=return_threshold)
    ###
    #############################################

    env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy)
    env = SpoofEntityWrapper(env, n_boxes,
                             ['box_obs', 'you_lock', 'team_lock', 'obj_lock'],
                             ['mask_ab_obs'])
    keys_mask_external += ['mask_ab_obs_spoof']

    if n_agents < 2:
        env = SpoofEntityWrapper(env, 1, ['agent_qpos_qvel'], ['mask_aa_obs'])

    env = LockAllWrapper(env, remove_object_specific_lock=True)
    if not grab_out_of_vision and grab_box:
        # Can only pull if in vision
        mask_keys = ['mask_ab_obs'] + (['mask_ar_obs'] if n_ramps > 0 else [])
        env = MaskActionWrapper(env, action_key='action_pull', mask_keys=mask_keys)
    if not grab_selective and grab_box:
        env = GrabClosestWrapper(env)
    env = DiscardMujocoExceptionEpisodes(env,n_agents)
    if n_ramps > 0:
        env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel'],
                                      'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock'],
                                      'ramp_obs': ['ramp_obs', 'ramp_you_lock', 'ramp_team_lock',
                                                   'ramp_obj_lock']})
    else:
        env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel'],
                                      'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock']})
    
    env = SelectKeysWrapper(env, keys_self=keys_self,
                            keys_external=keys_external,
                            keys_mask=keys_mask_self + keys_mask_external,
                            flatten=False)

    return env
Exemplo n.º 3
0
def HideAndSeekEnv(n_substeps=15,
                   horizon=80,
                   deterministic_mode=False,
                   floor_size=6.0,
                   grid_size=30,
                   door_size=2,
                   n_hiders=2,
                   n_seekers=2,
                   max_n_agents=None,
                   n_boxes=2,
                   n_ramps=1,
                   n_elongated_boxes=0,
                   rand_num_elongated_boxes=False,
                   n_min_boxes=None,
                   box_size=2.0,
                   boxid_obs=False,
                   box_only_z_rot=True,
                   rew_type='joint_zero_sum',
                   lock_box=True,
                   grab_box=True,
                   lock_ramp=False,
                   lock_type='all_lock_team_specific',
                   lock_grab_radius=0.25,
                   lock_out_of_vision=True,
                   grab_exclusive=False,
                   grab_out_of_vision=False,
                   grab_selective=False,
                   box_floor_friction=0.2,
                   other_friction=0.01,
                   gravity=[0, 0, -50],
                   action_lims=(-0.9, 0.9),
                   polar_obs=True,
                   scenario='quadrant',
                   quadrant_game_hider_uniform_placement=False,
                   p_door_dropout=0.5,
                   n_rooms=4,
                   random_room_number=True,
                   prob_outside_walls=1.0,
                   n_lidar_per_agent=0,
                   visualize_lidar=False,
                   compress_lidar_scale=None,
                   hiders_together_radius=None,
                   seekers_together_radius=None,
                   prep_fraction=0.4,
                   prep_obs=True,
                   team_size_obs=False,
                   restrict_rect=[0.1, 0.1, 5.9, 5.9],
                   penalize_objects_out=True,
                   n_food=0,
                   food_radius=None,
                   food_respawn_time=None,
                   max_food_health=1,
                   food_together_radius=None,
                   food_rew_type='selfish',
                   eat_when_caught=False,
                   food_reward_scale=1.0,
                   food_normal_centered=False,
                   food_box_centered=False,
                   n_food_cluster=1):

    grab_radius_multiplier = lock_grab_radius / box_size
    lock_radius_multiplier = lock_grab_radius / box_size

    env = Base(n_agents=n_hiders + n_seekers,
               n_substeps=n_substeps,
               horizon=horizon,
               floor_size=floor_size,
               grid_size=grid_size,
               action_lims=action_lims,
               deterministic_mode=deterministic_mode)

    if scenario == 'randomwalls':
        env.add_module(
            RandomWalls(n_agents=n_hiders + n_seekers,
                        grid_size=grid_size,
                        num_rooms=n_rooms,
                        random_room_number=random_room_number,
                        min_room_size=6,
                        door_size=door_size,
                        prob_outside_walls=prob_outside_walls,
                        gen_door_obs=False))
        box_placement_fn = uniform_placement
        ramp_placement_fn = uniform_placement
        cell_size = env.floor_size / grid_size

        first_hider_placement = uniform_placement
        if hiders_together_radius is not None:
            htr_in_cells = np.ceil(hiders_together_radius /
                                   cell_size).astype(int)

            env.metadata['hiders_together_radius'] = htr_in_cells

            close_to_first_hider_placement = close_to_other_object_placement(
                "agent", 0, "hiders_together_radius")

            agent_placement_fn = [first_hider_placement] + \
                                 [close_to_first_hider_placement] * (n_hiders - 1)
        else:
            agent_placement_fn = [first_hider_placement] * n_hiders

        first_seeker_placement = uniform_placement

        if seekers_together_radius is not None:
            str_in_cells = np.ceil(seekers_together_radius /
                                   cell_size).astype(int)

            env.metadata['seekers_together_radius'] = str_in_cells

            close_to_first_seeker_placement = close_to_other_object_placement(
                "agent", n_hiders, "seekers_together_radius")

            agent_placement_fn += [first_seeker_placement] + \
                                  [close_to_first_seeker_placement] * (n_seekers - 1)
        else:
            agent_placement_fn += [first_seeker_placement] * (n_seekers)

    elif scenario == 'quadrant':
        env.add_module(
            WallScenarios(n_agents=n_hiders + n_seekers,
                          grid_size=grid_size,
                          door_size=door_size,
                          scenario=scenario,
                          friction=other_friction,
                          p_door_dropout=p_door_dropout))
        box_placement_fn = quadrant_placement
        ramp_placement_fn = nearwall_placement  #uniform_placement
        hider_placement = uniform_placement if quadrant_game_hider_uniform_placement else quadrant_placement
        agent_placement_fn = [hider_placement] * n_hiders + [
            outside_quadrant_placement
        ] * n_seekers
    else:
        raise ValueError(f"Scenario {scenario} not supported.")
    env.add_module(
        Agents(n_hiders + n_seekers,
               placement_fn=agent_placement_fn,
               color=[np.array((66., 235., 244., 255.)) / 255] * n_hiders +
               [(1., 0., 0., 1.)] * n_seekers,
               friction=other_friction,
               polar_obs=polar_obs))

    if np.max(n_boxes) > 0:
        env.add_module(
            Boxes(n_boxes=n_boxes,
                  placement_fn=box_placement_fn,
                  friction=box_floor_friction,
                  polar_obs=polar_obs,
                  n_elongated_boxes=n_elongated_boxes,
                  boxid_obs=boxid_obs,
                  box_only_z_rot=box_only_z_rot))
    if n_ramps > 0:
        env.add_module(
            Ramps(n_ramps=n_ramps,
                  placement_fn=ramp_placement_fn,
                  friction=other_friction,
                  polar_obs=polar_obs,
                  pad_ramp_size=(np.max(n_elongated_boxes) > 0)))
    if n_lidar_per_agent > 0 and visualize_lidar:
        env.add_module(
            LidarSites(n_agents=n_hiders + n_seekers,
                       n_lidar_per_agent=n_lidar_per_agent))

    if n_food > 0:
        if scenario == 'quadrant':
            first_food_placement = quadrant_placement
        elif food_box_centered:
            first_food_placement = uniform_placement_middle(0.25)
        else:
            first_food_placement = uniform_placement
        if food_together_radius is not None:
            cell_size = env.floor_size / grid_size
            ftr_in_cells = np.ceil(food_together_radius /
                                   cell_size).astype(int)

            env.metadata['food_together_radius'] = ftr_in_cells

            assert n_food % n_food_cluster == 0
            cluster_assignments = np.repeat(
                np.arange(0, n_food, n_food // n_food_cluster),
                n_food // n_food_cluster)
            food_placement = [
                close_to_other_object_placement("food", i,
                                                "food_together_radius")
                for i in cluster_assignments
            ]
            food_placement[::n_food //
                           n_food_cluster] = [first_food_placement
                                              ] * n_food_cluster
        else:
            food_placement = first_food_placement
        env.add_module(Food(n_food, placement_fn=food_placement))

    env.add_module(AgentManipulation())
    if box_floor_friction is not None:
        env.add_module(FloorAttributes(friction=box_floor_friction))
    env.add_module(WorldConstants(gravity=gravity))
    env.reset()
    keys_self = [
        'agent_qpos_qvel', 'hider', 'prep_obs', 'current_step',
        'vector_door_obs'
    ]
    keys_mask_self = ['mask_aa_obs']
    keys_external = ['agent_qpos_qvel']
    keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock']
    keys_mask_external = []
    env = SplitMultiAgentActions(env)
    if team_size_obs:
        keys_self += ['team_size']
    env = TeamMembership(
        env, np.append(np.zeros((n_hiders, )), np.ones((n_seekers, ))))
    env = AgentAgentObsMask2D(env)
    hider_obs = np.array([[1]] * n_hiders + [[0]] * n_seekers)
    env = AddConstantObservationsWrapper(env, new_obs={'hider': hider_obs})
    env = HideAndSeekRewardWrapper(env,
                                   n_hiders=n_hiders,
                                   n_seekers=n_seekers,
                                   rew_type=rew_type)
    env = TimeWrapper(env, horizon)
    if restrict_rect is not None:
        env = RestrictAgentsRect(env,
                                 restrict_rect=restrict_rect,
                                 penalize_objects_out=penalize_objects_out)
    env = PreparationPhase(env, prep_fraction=prep_fraction)
    env = DiscretizeActionWrapper(env, 'action_movement')
    if np.max(n_boxes) > 0:
        env = AgentGeomObsMask2D(env,
                                 pos_obs_key='box_pos',
                                 mask_obs_key='mask_ab_obs',
                                 geom_idxs_obs_key='box_geom_idxs')
        keys_external += ['mask_ab_obs', 'box_obs']
        keys_mask_external.append('mask_ab_obs')
    if n_food:
        env = AgentSiteObsMask2D(env,
                                 pos_obs_key='food_pos',
                                 mask_obs_key='mask_af_obs')
        env = FoodHealthWrapper(
            env,
            respawn_time=(np.inf
                          if food_respawn_time is None else food_respawn_time),
            eat_thresh=(np.inf if food_radius is None else food_radius),
            max_food_health=max_food_health,
            food_rew_type=food_rew_type,
            reward_scale=food_reward_scale)
        env = MaskActionWrapper(env, 'action_eat_food',
                                ['mask_af_obs'])  # Can only eat if in vision
        if prep_obs:
            env = MaskPrepPhaseAction(env, 'action_eat_food')
        if not eat_when_caught:
            env = MaskUnseenAction(env, 0, 'action_eat_food')
        eat_agents = np.arange(n_hiders)
        env = AlwaysEatWrapper(env, agent_idx_allowed=eat_agents)
        keys_external += ['mask_af_obs', 'food_obs']
        keys_mask_external.append('mask_af_obs')
    if lock_box and np.max(n_boxes) > 0:
        env = LockObjWrapper(
            env,
            body_names=[f'moveable_box{i}' for i in range(np.max(n_boxes))],
            agent_idx_allowed_to_lock=np.arange(n_hiders + n_seekers),
            lock_type=lock_type,
            radius_multiplier=lock_radius_multiplier,
            obj_in_game_metadata_keys=["curr_n_boxes"],
            agent_allowed_to_lock_keys=None
            if lock_out_of_vision else ["mask_ab_obs"])
    if n_ramps > 0:
        env = AgentGeomObsMask2D(env,
                                 pos_obs_key='ramp_pos',
                                 mask_obs_key='mask_ar_obs',
                                 geom_idxs_obs_key='ramp_geom_idxs')
        if lock_ramp:
            env = LockObjWrapper(
                env,
                body_names=[f'ramp{i}:ramp' for i in range(n_ramps)],
                agent_idx_allowed_to_lock=np.arange(n_hiders + n_seekers),
                lock_type=lock_type,
                ac_obs_prefix='ramp_',
                radius_multiplier=lock_radius_multiplier,
                obj_in_game_metadata_keys=['curr_n_ramps'],
                agent_allowed_to_lock_keys=None
                if lock_out_of_vision else ["mask_ar_obs"])
        keys_external += ['ramp_obs']
        keys_mask_external.append('mask_ar_obs')
    if grab_box and (np.max(n_boxes) > 0 or n_ramps > 0):
        env = GrabObjWrapper(
            env, [f'moveable_box{i}' for i in range(np.max(n_boxes))] +
            ([f"ramp{i}:ramp" for i in range(n_ramps)]),
            radius_multiplier=grab_radius_multiplier,
            grab_exclusive=grab_exclusive,
            obj_in_game_metadata_keys=['curr_n_boxes', 'curr_n_ramps'])

    if n_lidar_per_agent > 0:
        env = Lidar(env,
                    n_lidar_per_agent=n_lidar_per_agent,
                    visualize_lidar=visualize_lidar,
                    compress_lidar_scale=compress_lidar_scale)
        keys_copy += ['lidar']
        keys_external += ['lidar']

    if prep_obs:
        env = TrackStatWrapper(env, np.max(n_boxes), n_ramps, n_food)
    env = SplitObservations(env,
                            keys_self + keys_mask_self,
                            keys_copy=keys_copy)
    if n_boxes:
        env = SpoofEntityWrapper(
            env, np.max(n_boxes),
            ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], ['mask_ab_obs'])
    if n_food:
        env = SpoofEntityWrapper(env, n_food, ['food_obs'], ['mask_af_obs'])
    keys_mask_external += ['mask_ab_obs_spoof', 'mask_af_obs_spoof']
    if max_n_agents is not None:
        env = SpoofEntityWrapper(env, max_n_agents,
                                 ['agent_qpos_qvel', 'hider', 'prep_obs'],
                                 ['mask_aa_obs'])
    env = LockAllWrapper(env, remove_object_specific_lock=True)
    if not grab_out_of_vision and grab_box and n_boxes > 0:
        env = MaskActionWrapper(env, 'action_pull', ['mask_ab_obs'] +
                                (['mask_ar_obs'] if n_ramps > 0 else []))
    if not grab_selective and grab_box and n_boxes > 0:
        env = GrabClosestWrapper(env)
    env = NoActionsInPrepPhase(env, np.arange(n_hiders, n_hiders + n_seekers))
    env = DiscardMujocoExceptionEpisodes(env, n_hiders + n_seekers)
    if n_boxes > 0 and n_ramps > 0:
        env = ConcatenateObsWrapper(
            env, {
                'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'],
                'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock'],
                'ramp_obs': ['ramp_obs'] +
                (['ramp_you_lock', 'ramp_team_lock', 'ramp_obj_lock']
                 if lock_ramp else [])
            })
    elif n_boxes > 0:
        env = ConcatenateObsWrapper(
            env, {
                'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'],
                'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock']
            })
    elif n_ramps > 0:
        env = ConcatenateObsWrapper(
            env, {
                'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs'],
                'ramp_obs': ['ramp_obs'] +
                (['ramp_you_lock', 'ramp_team_lock', 'ramp_obj_lock']
                 if lock_ramp else [])
            })
    else:
        env = ConcatenateObsWrapper(
            env, {'agent_qpos_qvel': ['agent_qpos_qvel', 'hider', 'prep_obs']})
    env = SelectKeysWrapper(env,
                            keys_self=keys_self,
                            keys_external=keys_external,
                            keys_mask=keys_mask_self + keys_mask_external,
                            flatten=False)
    return env
Exemplo n.º 4
0
def BlueprintConstructionEnv(n_substeps=15,
                             horizon=200,
                             deterministic_mode=False,
                             floor_size=4.0,
                             grid_size=30,
                             n_agents=2,
                             n_rooms=2,
                             random_room_number=False,
                             scenario='empty',
                             door_size=2,
                             n_sites=2,
                             n_elongated_sites=0,
                             site_placement='uniform_away_from_walls',
                             reward_infos=[{
                                 'type': 'construction_dense'
                             }, {
                                 'type': 'construction_completed'
                             }],
                             n_boxes=4,
                             n_elongated_boxes=0,
                             n_min_boxes=None,
                             box_size=0.5,
                             box_only_z_rot=False,
                             lock_box=True,
                             grab_box=True,
                             grab_selective=False,
                             lock_grab_radius=0.25,
                             lock_type='all_lock_team_specific',
                             grab_exclusive=False,
                             grab_out_of_vision=True,
                             lock_out_of_vision=True,
                             box_floor_friction=0.2,
                             other_friction=0.01,
                             gravity=[0, 0, -50],
                             action_lims=(-0.9, 0.9),
                             polar_obs=True,
                             n_lidar_per_agent=0,
                             visualize_lidar=False,
                             compress_lidar_scale=None,
                             boxid_obs=False,
                             boxsize_obs=False,
                             team_size_obs=False,
                             additional_obs={}):

    grab_radius_multiplier = lock_grab_radius / box_size
    lock_radius_multiplier = lock_grab_radius / box_size

    if type(n_sites) not in [list, np.ndarray]:
        n_sites = [n_sites, n_sites]

    env = Base(n_agents=n_agents,
               n_substeps=n_substeps,
               horizon=horizon,
               floor_size=floor_size,
               grid_size=grid_size,
               action_lims=action_lims,
               deterministic_mode=deterministic_mode)

    if scenario == 'randomwalls':
        env.add_module(
            RandomWalls(n_agents=n_agents,
                        grid_size=grid_size,
                        num_rooms=n_rooms,
                        random_room_number=random_room_number,
                        min_room_size=6,
                        door_size=door_size,
                        gen_door_obs=False))
    elif scenario == 'empty':
        env.add_module(
            WallScenarios(n_agents=n_agents,
                          grid_size=grid_size,
                          door_size=door_size,
                          scenario='empty',
                          friction=other_friction))
    else:
        raise ValueError(f"Scenario {scenario} not supported.")

    env.add_module(
        Agents(n_agents,
               placement_fn=uniform_placement,
               color=[np.array((66., 235., 244., 255.)) / 255] * n_agents,
               friction=other_friction,
               polar_obs=polar_obs))
    if np.max(n_boxes) > 0:
        env.add_module(
            Boxes(n_boxes=n_boxes,
                  placement_fn=uniform_placement,
                  friction=box_floor_friction,
                  polar_obs=polar_obs,
                  n_elongated_boxes=n_elongated_boxes,
                  boxid_obs=boxid_obs,
                  boxsize_obs=boxsize_obs,
                  box_size=box_size,
                  box_only_z_rot=box_only_z_rot,
                  mark_box_corners=True))
    if n_sites[1] > 0:
        if site_placement == 'center':
            site_placement_fn = center_placement
        elif site_placement == 'uniform':
            site_placement_fn = uniform_placement
        elif site_placement == 'uniform_away_from_walls':
            site_placement_fn = uniform_placement_middle(0.85)
        else:
            raise ValueError(
                f'Site placement option: {site_placement} not implemented.'
                ' Please choose from center, uniform and uniform_away_from_walls.'
            )

        env.add_module(
            ConstructionSites(n_sites,
                              placement_fn=site_placement_fn,
                              site_size=box_size,
                              site_height=box_size / 2,
                              n_elongated_sites=n_elongated_sites))
    if n_lidar_per_agent > 0 and visualize_lidar:
        env.add_module(
            LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent))
    if np.max(n_boxes) > 0 and grab_box:
        env.add_module(AgentManipulation())
    if box_floor_friction is not None:
        env.add_module(FloorAttributes(friction=box_floor_friction))
    env.add_module(WorldConstants(gravity=gravity))
    env.reset()
    keys_self = ['agent_qpos_qvel', 'current_step']
    keys_mask_self = ['mask_aa_obs']
    keys_external = ['agent_qpos_qvel', 'construction_site_obs']
    keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock']
    keys_mask_external = []

    env = AddConstantObservationsWrapper(env, new_obs=additional_obs)
    keys_external += list(additional_obs)
    keys_mask_external += [ob for ob in additional_obs if 'mask' in ob]

    env = SplitMultiAgentActions(env)
    if team_size_obs:
        keys_self += ['team_size']
    env = TeamMembership(env, np.zeros((n_agents, )))
    env = AgentAgentObsMask2D(env)
    env = DiscretizeActionWrapper(env, 'action_movement')
    if np.max(n_boxes) > 0:
        env = AgentGeomObsMask2D(env,
                                 pos_obs_key='box_pos',
                                 mask_obs_key='mask_ab_obs',
                                 geom_idxs_obs_key='box_geom_idxs')
        keys_external += ['mask_ab_obs', 'box_obs']
        keys_mask_external.append('mask_ab_obs')
    if lock_box and np.max(n_boxes) > 0:
        agent_allowed_to_lock_keys = None if lock_out_of_vision else [
            "mask_ab_obs"
        ]
        env = LockObjWrapper(
            env,
            body_names=[f'moveable_box{i}' for i in range(n_boxes)],
            agent_idx_allowed_to_lock=np.arange(n_agents),
            lock_type=lock_type,
            radius_multiplier=lock_radius_multiplier,
            obj_in_game_metadata_keys=["curr_n_boxes"],
            agent_allowed_to_lock_keys=agent_allowed_to_lock_keys)
    if grab_box and np.max(n_boxes) > 0:
        env = GrabObjWrapper(env, [f'moveable_box{i}' for i in range(n_boxes)],
                             radius_multiplier=grab_radius_multiplier,
                             grab_exclusive=grab_exclusive,
                             obj_in_game_metadata_keys=['curr_n_boxes'])

    if n_lidar_per_agent > 0:
        env = Lidar(env,
                    n_lidar_per_agent=n_lidar_per_agent,
                    visualize_lidar=visualize_lidar,
                    compress_lidar_scale=compress_lidar_scale)
        keys_copy += ['lidar']
        keys_external += ['lidar']

    env = ConstructionDistancesWrapper(env)
    env = NumpyArrayRewardWrapper(env)

    reward_wrappers = {
        'construction_dense': ConstructionDenseRewardWrapper,
        'construction_completed': ConstructionCompletedRewardWrapper,
    }

    for rew_info in reward_infos:
        rew_type = rew_info['type']
        del rew_info['type']
        env = reward_wrappers[rew_type](env, **rew_info)
    env = TimeWrapper(env, horizon)
    env = SplitObservations(env,
                            keys_self + keys_mask_self,
                            keys_copy=keys_copy)
    if n_agents == 1:
        env = SpoofEntityWrapper(env, 2, ['agent_qpos_qvel'], ['mask_aa_obs'])
    env = SpoofEntityWrapper(env, n_boxes,
                             ['box_obs', 'you_lock', 'team_lock', 'obj_lock'],
                             ['mask_ab_obs'])
    env = SpoofEntityWrapper(env, n_sites[1], ['construction_site_obs'],
                             ['mask_acs_obs'])
    keys_mask_external += ['mask_ab_obs_spoof', 'mask_acs_obs_spoof']
    env = LockAllWrapper(env, remove_object_specific_lock=True)
    if not grab_out_of_vision and grab_box:
        env = MaskActionWrapper(env, 'action_pull',
                                ['mask_ab_obs'])  # Can only pull if in vision
    if not grab_selective and grab_box:
        env = GrabClosestWrapper(env)
    env = DiscardMujocoExceptionEpisodes(env, n_agents)
    env = ConcatenateObsWrapper(
        env, {
            'agent_qpos_qvel': ['agent_qpos_qvel'],
            'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock']
        })
    env = SelectKeysWrapper(env,
                            keys_self=keys_self,
                            keys_external=keys_external,
                            keys_mask=keys_mask_self + keys_mask_external,
                            flatten=False)
    return env
def ShelterConstructionEnv(args, n_substeps=15, horizon=80, deterministic_mode=False,
             floor_size=6.0, grid_size=30,
             n_agents=1,
             objective_diameter=[1, 1], objective_placement='center',
             num_rays_per_side=25, shelter_reward_scale=1,
             n_boxes=2, n_elongated_boxes=0,
             box_size=0.5, box_only_z_rot=False,
             lock_box=True, grab_box=True, grab_selective=False, lock_grab_radius=0.25,
             lock_type='any_lock_specific', grab_exclusive=False,
             grab_out_of_vision=False, lock_out_of_vision=True,
             box_floor_friction=0.2, other_friction=0.01, gravity=[0, 0, -50],
             action_lims=(-0.9, 0.9), polar_obs=True,
             n_lidar_per_agent=0, visualize_lidar=False, compress_lidar_scale=None,
             boxid_obs=True, boxsize_obs=True, team_size_obs=False, additional_obs={}):

    assert n_agents==1, ("only 1 agents is supported, check the config.py.")

    grab_radius_multiplier = lock_grab_radius / box_size
    lock_radius_multiplier = lock_grab_radius / box_size

    env = Base(n_agents=n_agents, n_substeps=n_substeps, horizon=horizon,
               floor_size=floor_size, grid_size=grid_size,
               action_lims=action_lims, deterministic_mode=deterministic_mode)

    env.add_module(WallScenarios(n_agents=n_agents, grid_size=grid_size, door_size=2, scenario='empty',
                                 friction=other_friction))

    if objective_placement == 'center':
        objective_placement_fn = center_placement
    elif objective_placement == 'uniform_away_from_walls':
        objective_placement_fn = uniform_placement_middle(0.7)

    env.add_module(Cylinders(1, diameter=objective_diameter, height=box_size,
                             make_static=True, placement_fn=objective_placement_fn))

    env.add_module(Agents(n_agents,
                          placement_fn=uniform_placement,
                          color=[np.array((66., 235., 244., 255.)) / 255] * n_agents,
                          friction=other_friction,
                          polar_obs=polar_obs))
    if np.max(n_boxes) > 0:
        env.add_module(Boxes(n_boxes=n_boxes, placement_fn=uniform_placement,
                             friction=box_floor_friction, polar_obs=polar_obs,
                             n_elongated_boxes=n_elongated_boxes,
                             boxid_obs=boxid_obs, boxsize_obs=boxsize_obs,
                             box_size=box_size,
                             box_only_z_rot=box_only_z_rot))
    if n_lidar_per_agent > 0 and visualize_lidar:
        env.add_module(LidarSites(n_agents=n_agents, n_lidar_per_agent=n_lidar_per_agent))

    env.add_module(AgentManipulation())
    if box_floor_friction is not None:
        env.add_module(FloorAttributes(friction=box_floor_friction))
    env.add_module(WorldConstants(gravity=gravity))
    env.reset()
    keys_self = ['agent_qpos_qvel','current_step']
    keys_mask_self = ['mask_aa_obs']
    keys_external = ['agent_qpos_qvel', 'vector_door_obs']
    keys_copy = ['you_lock', 'team_lock', 'ramp_you_lock', 'ramp_team_lock']
    keys_mask_external = []

    env = AddConstantObservationsWrapper(env, new_obs=additional_obs)
    keys_external += list(additional_obs)
    keys_mask_external += [ob for ob in additional_obs if 'mask' in ob]

    env = ShelterRewardWrapper(env, num_rays_per_side=num_rays_per_side,
                               reward_scale=shelter_reward_scale)
    env = SplitMultiAgentActions(env)

    if team_size_obs:
        keys_self += ['team_size']
    env = TeamMembership(env, np.zeros((n_agents,)))
    env = AgentAgentObsMask2D(env)
    env = DiscretizeActionWrapper(env, 'action_movement')
    if np.max(n_boxes) > 0:
        env = AgentGeomObsMask2D(env, pos_obs_key='box_pos', mask_obs_key='mask_ab_obs',
                                 geom_idxs_obs_key='box_geom_idxs')
        keys_external += ['mask_ab_obs', 'box_obs']
        keys_mask_external.append('mask_ab_obs')
    if lock_box and np.max(n_boxes) > 0:
        env = LockObjWrapper(env, body_names=[f'moveable_box{i}' for i in range(n_boxes)],
                             agent_idx_allowed_to_lock=np.arange(n_agents),
                             lock_type=lock_type,
                             radius_multiplier=lock_radius_multiplier,
                             obj_in_game_metadata_keys=["curr_n_boxes"],
                             agent_allowed_to_lock_keys=None if lock_out_of_vision else ["mask_ab_obs"])

    if grab_box and np.max(n_boxes) > 0:
        env = GrabObjWrapper(env, [f'moveable_box{i}' for i in range(n_boxes)],
                             radius_multiplier=grab_radius_multiplier,
                             grab_exclusive=grab_exclusive,
                             obj_in_game_metadata_keys=['curr_n_boxes'])

    if n_lidar_per_agent > 0:
        env = Lidar(env, n_lidar_per_agent=n_lidar_per_agent, visualize_lidar=visualize_lidar,
                    compress_lidar_scale=compress_lidar_scale)
        keys_copy += ['lidar']
        keys_external += ['lidar']
    env = TimeWrapper(env, horizon)
    env = SplitObservations(env, keys_self + keys_mask_self, keys_copy=keys_copy)
    if n_agents == 1:
        env = SpoofEntityWrapper(env, 2, ['agent_qpos_qvel'], ['mask_aa_obs'])
    env = SpoofEntityWrapper(env, n_boxes, ['box_obs', 'you_lock', 'team_lock', 'obj_lock'], ['mask_ab_obs'])
    keys_mask_external += ['mask_ab_obs_spoof']
    env = LockAllWrapper(env, remove_object_specific_lock=True)
    if not grab_out_of_vision and grab_box:
        env = MaskActionWrapper(env, 'action_pull', ['mask_ab_obs'])  # Can only pull if in vision
    if not grab_selective and grab_box:
        env = GrabClosestWrapper(env)
    env = DiscardMujocoExceptionEpisodes(env, n_agents)
    env = ConcatenateObsWrapper(env, {'agent_qpos_qvel': ['agent_qpos_qvel'],
                                      'box_obs': ['box_obs', 'you_lock', 'team_lock', 'obj_lock']})
    env = SelectKeysWrapper(env, keys_self=keys_self,
                            keys_external=keys_external,
                            keys_mask=keys_mask_self + keys_mask_external,
                            flatten=False)
    return env