def __init__(self, observation_space, action_space, hidden_spec, enable_obsnorm, varscope_name): EzPickle.__init__(self, observation_space, action_space, hidden_spec, enable_obsnorm, varscope_name) self.hidden_spec = hidden_spec self._dist = Categorical(action_space.n) super(CategoricalMLPPolicy, self).__init__(observation_space, action_space, action_space.n, enable_obsnorm, varscope_name)
def __init__(self, env, scale_reward=1., enable_obsnorm=False, enable_rewnorm=False, obs_alpha=0.001, rew_alpha=0.001, eps=1e-8): EzPickle.__init__(self, env, scale_reward, enable_obsnorm, enable_rewnorm, obs_alpha, rew_alpha, eps) self._unwrapped = env self._scale_reward = scale_reward self._enable_obsnorm = enable_obsnorm self._enable_rewnorm = enable_rewnorm self._obs_alpha = obs_alpha self._rew_alpha = rew_alpha self._eps = eps self._flatobs_shape = [None for _ in env.agents] self._obs_mean = [None for _ in env.agents] self._obs_var = [None for _ in env.agents] self._rew_mean = [None for _ in env.agents] self._rew_var = [None for _ in env.agents] for agid, agent in enumerate(env.agents): if isinstance(agent.observation_space, spaces.Box): self._flatobs_shape[agid] = np.prod(agent.observation_space.shape) elif isinstance(env.observation_space, spaces.Discrete): self._flatobs_shape[agid] = agent.observation_space.n self._obs_mean[agid] = np.zeros(self._flatobs_shape[agid]) self._obs_var[agid] = np.ones(self._flatobs_shape[agid]) self._rew_mean[agid] = 0. self._rew_var[agid] = 1.
def __init__(self, n_legs=4, ts=0.02, integrator='RK4', leg_length=0.282, out_file="multi_ant.xml", base_file="ant_og.xml", reward_mech='local', pos_noise=1e-3, vel_noise=1e-3, force_noise=1e-3 ): EzPickle.__init__(self, n_legs, ts, integrator, leg_length, out_file, base_file, reward_mech, pos_noise, vel_noise, force_noise) self.n_legs = n_legs self.ts = ts self.integrator = integrator self.leg_length = leg_length self.out_file = out_file self.base_file = base_file self._reward_mech = reward_mech self.pos_noise = pos_noise self.vel_noise = vel_noise self.force_noise = force_noise self.legs = None self.out_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), self.out_file) self.base_file_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), self.base_file) self.gen_xml(out_file=self.out_file_path, og_file=self.base_file_path) mujoco_env.MujocoEnv.__init__(self, self.out_file_path, 5) self.legs = [AntLeg(self.model, i, n_legs, pos_noise=pos_noise, vel_noise=vel_noise, force_noise=force_noise) for i in range(self.n_legs)]
def __init__(self, n_walkers=2, position_noise=1e-3, angle_noise=1e-3, reward_mech='local', forward_reward=1.0, fall_reward=-100.0, drop_reward=-100.0, terminate_on_fall=True, one_hot=False, **kwargs): EzPickle.__init__(self, n_walkers, position_noise, angle_noise, reward_mech, forward_reward, fall_reward, drop_reward, terminate_on_fall, one_hot, **kwargs) self.n_walkers = n_walkers self.position_noise = position_noise self.angle_noise = angle_noise self._reward_mech = reward_mech self.forward_reward = forward_reward self.fall_reward = fall_reward self.drop_reward = drop_reward self.terminate_on_fall = terminate_on_fall self.one_hot = one_hot self.setup() self.action_space, self.observation_space = [], [] for a_i in range(self.n_walkers): self.action_space.append(self.agents[a_i].action_space) self.observation_space.append(self.agents[a_i].observation_space)
def __init__(self, env, scale_reward=1., enable_obsnorm=False, enable_rewnorm=False, obs_alpha=0.001, rew_alpha=0.001, eps=1e-8): EzPickle.__init__(self, env, scale_reward, enable_obsnorm, enable_rewnorm, obs_alpha, rew_alpha, eps) self._unwrapped = env self._scale_reward = scale_reward self._enable_obsnorm = enable_obsnorm self._enable_rewnorm = enable_rewnorm self._obs_alpha = obs_alpha self._rew_alpha = rew_alpha self._eps = eps self._flatobs_shape = [None for _ in env.agents] self._obs_mean = [None for _ in env.agents] self._obs_var = [None for _ in env.agents] self._rew_mean = [None for _ in env.agents] self._rew_var = [None for _ in env.agents] for agid, agent in enumerate(env.agents): if isinstance(agent.observation_space, spaces.Box): self._flatobs_shape[agid] = np.prod( agent.observation_space.shape) elif isinstance(env.observation_space, spaces.Discrete): self._flatobs_shape[agid] = agent.observation_space.n self._obs_mean[agid] = np.zeros(self._flatobs_shape[agid]) self._obs_var[agid] = np.ones(self._flatobs_shape[agid]) self._rew_mean[agid] = 0. self._rew_var[agid] = 1.
def __init__(self): num_agents = 1 self.env_agents = [SimpleAgent() for _ in range(num_agents)] # NEEDED # Internal self.n_agents = len(self.env_agents) EzPickle.__init__(self) self.seed() self.reset()
def __init__(self, n_pursuers1, n_pursuers2, n_evaders1, n_evaders2, n_coop=2, n_poison=10, radius=0.015, obstacle_radius=0.2, obstacle_loc=np.array([0.5, 0.5]), ev_speed=0.01, poison_speed=0.01, n_sensors=30, sensor_range=0.2, action_scale=0.01, poison_reward=-1., food_reward=1., encounter_reward=.05, control_penalty=-.5, collision_penalty=-1, reward_mech='local', addid=True, speed_features=True, **kwargs): EzPickle.__init__(self, n_pursuers1, n_pursuers2, n_evaders1, n_evaders2, n_coop, n_poison, radius, obstacle_radius, obstacle_loc, ev_speed, poison_speed, n_sensors, sensor_range, action_scale, poison_reward, food_reward, encounter_reward, control_penalty, reward_mech, addid, speed_features, **kwargs) self.n_pursuers1 = n_pursuers1 self.n_pursuers2 = n_pursuers2 self.n_evaders1 = n_evaders1 self.n_evaders2 = n_evaders2 self.n_coop = n_coop self.n_poison = n_poison self.obstacle_radius = obstacle_radius self.obstacle_loc = obstacle_loc self.poison_speed = poison_speed self.radius = radius self.ev_speed = ev_speed self.n_sensors = n_sensors self.sensor_range1 = np.ones(self.n_pursuers1) * sensor_range self.sensor_range2 = np.ones(self.n_pursuers2) * sensor_range self.action_scale = action_scale self.poison_reward = poison_reward self.food_reward = food_reward self.control_penalty = control_penalty self.collision_penalty = collision_penalty self.encounter_reward = encounter_reward self.n_obstacles = 1 self._reward_mech = reward_mech self._addid = addid self._speed_features = speed_features self.seed() self._pursuers1 = [ Archea(npu + 1, self.radius, self.n_sensors, self.sensor_range1[npu], addid=self._addid, speed_features=self._speed_features) for npu in range(self.n_pursuers1) ] self._pursuers2 = [ Archea(npu + 1, self.radius, self.n_sensors, self.sensor_range2[npu], addid=self._addid, speed_features=self._speed_features) for npu in range(self.n_pursuers2) ] self._evaders1 = [ Archea(nev + 1, self.radius * 2, self.n_pursuers1, self.sensor_range1.mean() / 2) for nev in range(self.n_evaders1) ] self._evaders2 = [ Archea(nev + 1, self.radius * 2, self.n_pursuers1, self.sensor_range1.mean() / 2) for nev in range(self.n_evaders2) ] self._poisons = [ Archea(npo + 1, self.radius * 3 / 4, self.n_poison, 0) for npo in range(self.n_poison) ]
def __init__(self, n_walkers=2, position_noise=1e-3, angle_noise=1e-3, reward_mech='local', forward_reward=1.0, fall_reward=-100.0, drop_reward=-100.0, terminate_on_fall=True): EzPickle.__init__(self, n_walkers, position_noise, angle_noise, reward_mech, forward_reward, fall_reward, drop_reward, terminate_on_fall) self.seed() self.viewer = None self.world = Box2D.b2World() self.terrain = None self.n_walkers = n_walkers init_x = TERRAIN_STEP * TERRAIN_STARTPAD / 2 init_y = TERRAIN_HEIGHT + 2 * LEG_H self.start_x = [ init_x + WALKER_SEPERATION * i * TERRAIN_STEP for i in xrange(self.n_walkers) ] self.walkers = [ BipedalWalker(self.world, init_x=sx, init_y=init_y) for sx in self.start_x ] self.package_scale = n_walkers / 1.75 self.package_length = PACKAGE_LENGTH / SCALE * self.package_scale self.total_agents = n_walkers self.prev_shaping = np.zeros(self.n_walkers) self.prev_package_shaping = 0.0 self.position_noise = position_noise self.angle_noise = angle_noise self._reward_mech = reward_mech self.terrain_length = int(TERRAIN_LENGTH * n_walkers * 1 / 8.) self.forward_reward = forward_reward self.fall_reward = fall_reward self.drop_reward = drop_reward self.terminate_on_fall = terminate_on_fall self.reset()
def __init__(self): self.env_agents = [SimpleAgent() for _ in range(3)] # NEEDED # Internal self.n_agents = len(self.env_agents) self.time_to_event_generator = lambda: np.random.weibull(1.5,1)[0] self.time_to_event = np.array([self.time_to_event_generator() for _ in self.env_agents]) self.sojourn_time = np.array([ 0. for i in self.time_to_event]) self.global_time = 0. EzPickle.__init__(self) self.seed() self.reset()
def __init__(self, observation_space, action_space, hidden_spec, enable_obsnorm, min_stdev, init_logstdev, varscope_name): EzPickle.__init__(self, observation_space, action_space, hidden_spec, enable_obsnorm, min_stdev, init_logstdev, varscope_name) self.hidden_spec = hidden_spec self.min_stdev = min_stdev self.init_logstdev = init_logstdev self._dist = Gaussian(action_space.shape[0]) super(GaussianMLPPolicy, self).__init__( observation_space, action_space, action_space.shape[0] * 2, # Mean and diagonal stdev enable_obsnorm, varscope_name)
def __init__(self, n_good, n_hostages, n_bad, n_coop_save, n_coop_avoid, radius=0.015, key_loc=None, bad_speed=0.01, n_sensors=30, sensor_range=0.2, action_scale=0.01, save_reward=5., hit_reward=-1., encounter_reward=0.01, not_saved_reward=-3, bomb_reward=-5., bomb_radius=0.05, key_radius=0.0075, control_penalty=-.1, reward_mech='global', addid=True, **kwargs): """ The environment consists of a square world with hostages behind gates. One of the good agent has to find the keys only then the gates can be obtained. Once the gates are opened the good agents need to find the hostages to save them. They also need to avoid the bomb and the bad agents. Coming across a bomb terminates the game and gives a large negative reward """ EzPickle.__init__(self, n_good, n_hostages, n_bad, n_coop_save, n_coop_avoid, radius, key_loc, bad_speed, n_sensors, sensor_range, action_scale, save_reward, hit_reward, encounter_reward, not_saved_reward, bomb_reward, bomb_radius, key_radius, control_penalty, reward_mech, addid, **kwargs) self.n_good = n_good self.n_hostages = n_hostages self.n_bad = n_bad self.n_coop_save = n_coop_save self.n_coop_avoid = n_coop_avoid self.radius = radius self.key_loc = key_loc self.key_radius = key_radius self.bad_speed = bad_speed self.n_sensors = n_sensors self.sensor_range = np.ones(self.n_good) * sensor_range if isinstance( sensor_range, float) else sensor_range self.action_scale = action_scale self.save_reward = save_reward self.hit_reward = hit_reward self.encounter_reward = encounter_reward self.not_saved_reward = not_saved_reward self.bomb_reward = bomb_reward self.bomb_radius = bomb_radius self.control_penalty = control_penalty self._reward_mech = reward_mech self._addid = addid self.seed() self._rescuers = [CircAgent(agid + 1, self.radius, self.n_sensors, self.sensor_range[agid], addid=self._addid) for agid in range(self.n_good)] self._criminals = [ CircAgent(agid + 1, self.radius, self.n_sensors, self.sensor_range.mean()) for agid in range(self.n_bad) ] self._hostages = [CircAgent(agid + 1, self.radius * 2, self.n_sensors, self.sensor_range.min()) for agid in range(self.n_hostages)]
def __init__(self, observation_space, action_space, hidden_spec, enable_obsnorm, min_stdev, init_logstdev, state_include_action, varscope_name): EzPickle.__init__(self, observation_space, action_space, hidden_spec, enable_obsnorm, min_stdev, init_logstdev, state_include_action, varscope_name) self.hidden_spec = hidden_spec self.min_stdev = min_stdev self.init_logstdev = init_logstdev self.state_include_action = state_include_action # TODO add to stochastic policy self._dist = RecurrentGaussian(action_space.shape[0]) self.prev_actions = None self.prev_hiddens = None super(GaussianGRUPolicy, self).__init__( observation_space, action_space, action_space.shape[0] * 2, # Mean and diagonal stdev enable_obsnorm, varscope_name)
def __init__(self, n_pursuers, n_evaders, n_coop=2, n_poison=10, radius=0.015, obstacle_radius=0.2, obstacle_loc=np.array([0.5, 0.5]), ev_speed=0.01, poison_speed=0.01, n_sensors=30, sensor_range=0.2, action_scale=0.01, poison_reward=-1., food_reward=1., encounter_reward=.05, control_penalty=-.5, reward_mech='local', addid=True, speed_features=True, **kwargs): EzPickle.__init__(self, n_pursuers, n_evaders, n_coop, n_poison, radius, obstacle_radius, obstacle_loc, ev_speed, poison_speed, n_sensors, sensor_range, action_scale, poison_reward, food_reward, encounter_reward, control_penalty, reward_mech, addid, speed_features, **kwargs) self.n_pursuers = n_pursuers self.n_evaders = n_evaders self.n_coop = n_coop self.n_poison = n_poison self.obstacle_radius = obstacle_radius self.obstacle_loc = obstacle_loc self.poison_speed = poison_speed self.radius = radius self.ev_speed = ev_speed self.n_sensors = n_sensors self.sensor_range = np.ones(self.n_pursuers) * sensor_range self.action_scale = action_scale self.poison_reward = poison_reward self.food_reward = food_reward self.control_penalty = control_penalty self.encounter_reward = encounter_reward self.n_obstacles = 1 self._reward_mech = reward_mech self._addid = addid self._speed_features = speed_features self.seed() self._pursuers = [ Archea(npu + 1, self.radius, self.n_sensors, self.sensor_range[npu], addid=self._addid, speed_features=self._speed_features) for npu in range(self.n_pursuers) ] self._evaders = [ Archea(nev + 1, self.radius * 2, self.n_pursuers, self.sensor_range.mean() / 2) for nev in range(self.n_evaders) ] self._poisons = [ Archea(npo + 1, self.radius * 3 / 4, self.n_poison, 0) for npo in range(self.n_poison) ]
def __init__(self): self.discount = CT_DISCOUNT_RATE num_row_col = 1 self.n_agents = num_row_col**2 self.max_stop_time = 100 # seconds self.min_stop_time = 2 # seconds # specify connectivity as East to West across row, North to South across column self.connectivity = np.array(list(range(self.n_agents))).reshape( (num_row_col, num_row_col)) # Assigned on reset() self.env_agents = [None for _ in range(self.n_agents)] # NEEDED self.simpy_env = None self.agent_event_list = [None] * self.n_agents EzPickle.__init__(self) self.seed() self.reset()
def __init__(self, n_walkers=2, position_noise=1e-3, angle_noise=1e-3, reward_mech='local', forward_reward=1.0, fall_reward=-100.0, drop_reward=-100.0, terminate_on_fall=True, one_hot=False): EzPickle.__init__(self, n_walkers, position_noise, angle_noise, reward_mech, forward_reward, fall_reward, drop_reward, terminate_on_fall, one_hot) self.n_walkers = n_walkers self.position_noise = position_noise self.angle_noise = angle_noise self._reward_mech = reward_mech self.forward_reward = forward_reward self.fall_reward = fall_reward self.drop_reward = drop_reward self.terminate_on_fall = terminate_on_fall self.one_hot = one_hot self.setup()
def __init__(self, continuous_action_space=True, n_agents=MIN_AGENTS, constant_n_agents=True, training_mode='circle', sensor_mode='closest', sensor_capacity=SENSOR_CAPACITY, max_time_steps=MAX_TIME_STEPS, one_hot=False, render_option=False, speed_noise=1e-3, position_noise=1e-3, angle_noise=1e-3, reward_mech='local', rew_arrival=15, rew_closing=2.5, rew_nmac=-15, rew_large_turnrate=-0.1, rew_large_acc=-1, pen_action_heavy=True, random_mode=True): EzPickle.__init__(self, continuous_action_space, n_agents, constant_n_agents, training_mode, sensor_mode,sensor_capacity, max_time_steps, one_hot, render_option, speed_noise, position_noise, angle_noise, reward_mech, rew_arrival, rew_closing, rew_nmac, rew_large_turnrate, rew_large_acc, pen_action_heavy, random_mode) self.t = 0 self.aircraft = [] self.n_agents = n_agents self.continuous_action_space = continuous_action_space self.constant_n_agents = constant_n_agents self.training_mode = training_mode self.sensor_mode = sensor_mode self.sensor_capacity = sensor_capacity self.max_time_steps = max_time_steps self.one_hot = one_hot self.render_option = render_option self.circle_radius = random.choice(range(MIN_CIRCLE_RADIUS, MAX_CIRCLE_RADIUS)) # Observation noises: self.speed_noise = 1e-3 self.position_noise = 1e-3 self.angle_noise = 1e-3 # Reward settings: self._reward_mech = reward_mech self.rew_arrival = rew_arrival self.rew_closing = rew_closing self.rew_nmac = rew_nmac self.rew_large_turnrate = rew_large_turnrate self.rew_large_acc = rew_large_acc self.pen_action_heavy = pen_action_heavy self.random_mode = random_mode self.observation_space = \ spaces.Box(low=-1, high=1, shape=(OWN_OBS_DIM + PAIR_OBS_DIM * self.sensor_capacity, )) if self.continuous_action_space: self.action_space = spaces.Box(low=-1, high=1, shape=(ACTION_DIM,)) else: self.action_space = spaces.Discrete(DISC_ACTION_DIM) self.seed()
def __init__(self, map_pool, **kwargs): # kwargs = dictionary where you can pop key of size 1 off to define term # if present, assign value and if not use default EzPickle.__init__(self, map_pool, **kwargs) #initialize map, observation, reward self.sample_maps = kwargs.pop('sample_maps', False) self.map_pool = map_pool map_matrix = map_pool[0] self.map_matrix = map_matrix xs, ys = self.map_matrix.shape self.xs = xs self.ys = ys self._reward_mech = kwargs.pop('reward_mech', 'global') self.obs_range = kwargs.pop( 'obs_range', 3) # can see 3 grids around them by default #assert self.obs_range % 2 != 0, "obs_range should be odd" self.obs_offset = int((self.obs_range - 1) / 2) self.flatten = kwargs.pop('flatten', True) #initalize agents self.n_surveillances = kwargs.pop('n_surveillances', 1) self.n_snipers = kwargs.pop('n_snipers', 1) self.n_targets = kwargs.pop('n_targets', 1) #self.agents = list of single agent entities that define how it should move given inputs #helper function for creating list self.surveillances = agent_utils.create_agents(self.n_surveillances, map_matrix, self.obs_range, flatten=self.flatten) self.snipers = agent_utils.create_agents(self.n_snipers, map_matrix, self.obs_range, flatten=self.flatten) self.targets = agent_utils.create_agents(self.n_targets, map_matrix, self.obs_range, flatten=self.flatten) self.surveillance_layer = AgentLayer(xs, ys, self.surveillances) self.sniper_layer = AgentLayer(xs, ys, self.snipers) self.target_layer = AgentLayer(xs, ys, self.targets) n_act = self.sniper_layer.get_nactions(0) self.sniper_controller = kwargs.pop('sniper_controller', RandomPolicy(n_act)) self.target_controller = kwargs.pop('target_controller', RandomPolicy(n_act)) self.sniper_r = kwargs.pop('term_sniper', -1.0) self.target_r = kwargs.pop('term_evade', 0.1) self.urgency_reward = kwargs.pop('urgency_reward', 0.0) # initialize remainder of state self.layer_norm = kwargs.pop('layer_norm', 10) self.current_agent_layer = np.zeros((xs, ys), dtype=np.int32) self.include_id = kwargs.pop('include_id', True) self.surveillance_actions = np.zeros(n_act, dtype=np.int32) self.sniper_actions = np.zeros(n_act, dtype=np.int32) self.target_actions = np.zeros(n_act, dtype=np.int32) # set up the action and observation spaces self.low = np.array([0.0 for i in xrange(4 * self.obs_range**2)]) self.high = np.array([1.0 for i in xrange(4 * self.obs_range**2)]) if self.include_id: self.low = np.append(self.low, 0.0) self.high = np.append(self.high, 1.0) self.action_space = spaces.Discrete(n_act) if self.flatten: self.observation_space = spaces.Box(self.low, self.high) else: self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(5, self.obs_range, self.obs_range)) self.local_obs = np.zeros( (self.n_surveillances, 5, self.obs_range, self.obs_range)) # Nagents X 4 X xsize X ysize self.act_dims = [n_act for i in xrange(self.n_surveillances)] #more state set up self.initial_config = kwargs.pop('initial_config', {}) self.constraint_window = kwargs.pop('constraint_window', 1.0) self.curriculum_remove_every = kwargs.pop('curriculum_remove_every', 500) self.curriculum_constrain_rate = kwargs.pop( 'curriculum_constrain_rate', 0.0) self.curriculum_turn_off_shaping = kwargs.pop( 'curriculum_turn_off_shaping', np.inf) self.surround = kwargs.pop('surround', True) self.surround_mask = np.array([[-1, 0], [1, 0], [0, 1], [0, -1]]) #layers of state #layer 1: buildings #layer 2: snipers #layer 3: targets #layer 4: suveillance #layer 5: irrelevant self.model_state = np.zeros((5, ) + map_matrix.shape, dtype=np.float32) ################################################################# # The functions below are the interface with MultiAgentSiulator # ################################################################# @property def agents(self): return self.surveillances
def __init__(self, radius=0.015, obstacle_radius=0.2, obstacle_loc=np.array([0.5, 0.5]), ev_speed=0.01, n_sensors=20, sensor_range=2, action_scale=0.01, food_reward=10, encounter_reward=.05, control_penalty=-0.1, evader_params=np.array([0.1, 0.05]), speed_features=True, is_observability_full=False, max_velocity_pursuer=0.05, meta_learning=False, **kwargs): EzPickle.__init__(self, radius, obstacle_radius, obstacle_loc, ev_speed, n_sensors, sensor_range, action_scale, food_reward, encounter_reward, control_penalty, evader_params, speed_features, is_observability_full, max_velocity_pursuer, meta_learning, **kwargs) self.obstacle_radius = obstacle_radius self.obstacle_loc = obstacle_loc self.ev_speed = 0.05 * (1 - evader_params[0]) self.n_sensors = n_sensors self.sensor_range = np.ones(1) * sensor_range self.radius = radius self.action_scale = action_scale self.food_reward = food_reward self.encounter_reward = encounter_reward self.control_penalty = control_penalty self.n_obstacles = 1 self._speed_features = speed_features self.seed() self._pursuer = Archea(1, self.radius, self.n_sensors, self.sensor_range, is_observability_full, speed_features=True) self._evader = Archea(1, self.radius, self.n_sensors, self.sensor_range, is_observability_full, speed_features=True) self._food = Archea(1, self.radius * 0.75, self.n_sensors, self.sensor_range, is_observability_full, speed_features=True) self._pursuers = [self._pursuer] self.evader_params = evader_params self._meta_learning = meta_learning self.max_velocity_pursuer = max_velocity_pursuer if self._meta_learning: self.evader_params[0] = truncnorm.rvs(-2, 2, loc=0.5, scale=0.25) while self.evader_params[0] == 0: self.evader_params[0] = truncnorm.rvs(-2, 2, loc=0.5, scale=0.25) self.is_observability_full = is_observability_full self._evader_move = False
def __init__(self, map_pool, **kwargs): EzPickle.__init__(self, map_pool, **kwargs) """ In evade purusit a set of pursuers must 'tag' a set of evaders Required arguments: - map_matrix: the map on which agents interact Optional arguments: - Ally layer: list of pursuers Opponent layer: list of evaders Ally controller: stationary policy of ally pursuers Ally controller: stationary policy of opponent evaders map_matrix: the map on which agents interact catchr: reward for 'tagging' a single evader caughtr: reward for getting 'tagged' by a pursuer train_pursuit: flag indicating if we are simulating pursuers or evaders initial_config: dictionary of form initial_config['allies']: the initial ally confidguration (matrix) initial_config['opponents']: the initial opponent confidguration (matrix) """ self.sample_maps = kwargs.pop('sample_maps', False) self.map_pool = map_pool map_matrix = map_pool self.map_matrix = map_matrix zs, xs, ys = self.map_matrix.shape self.xs = xs self.ys = ys self.zs = zs self._reward_mech = kwargs.pop('reward_mech', 'global') self.n_evaders = kwargs.pop('n_evaders', 1) self.n_pursuers = kwargs.pop('n_pursuers', 1) self.obs_range = kwargs.pop('obs_range', 3) # can see 3 grids around them by default #assert self.obs_range % 2 != 0, "obs_range should be odd" self.obs_offset = int((self.obs_range - 1) / 2) self.flatten = kwargs.pop('flatten', True) self.pursuers = agent_utils.create_agents(self.n_pursuers, map_matrix, self.obs_range, flatten=self.flatten) self.evaders = agent_utils.create_agents(self.n_evaders, map_matrix, self.obs_range, flatten=self.flatten) self.pursuer_layer = kwargs.pop('ally_layer', AgentLayer(xs, ys, zs, self.pursuers)) self.evader_layer = kwargs.pop('opponent_layer', AgentLayer(xs, ys, zs, self.evaders)) self.layer_norm = kwargs.pop('layer_norm', 10) self.n_catch = kwargs.pop('n_catch', 2) self.random_opponents = kwargs.pop('random_opponents', False) self.max_opponents = kwargs.pop('max_opponents', 10) n_act_purs = self.pursuer_layer.get_nactions(0) n_act_ev = self.evader_layer.get_nactions(0) self.evader_controller = kwargs.pop('evader_controller', RandomPolicy(n_act_purs)) self.pursuer_controller = kwargs.pop('pursuer_controller', RandomPolicy(n_act_ev)) self.current_agent_layer = np.zeros((zs, xs, ys), dtype=np.int32) self.catchr = kwargs.pop('catchr', 0.01) self.caughtr = kwargs.pop('caughtr', -0.01) self.term_pursuit = kwargs.pop('term_pursuit', 5.0) self.term_evade = kwargs.pop('term_evade', -5.0) self.urgency_reward = kwargs.pop('urgency_reward', 0.0) self.include_id = kwargs.pop('include_id', True) self.ally_actions = np.zeros(n_act_purs, dtype=np.int32) self.opponent_actions = np.zeros(n_act_ev, dtype=np.int32) self.train_pursuit = kwargs.pop('train_pursuit', True) if self.train_pursuit: self.low = np.array([0.0 for i in range(3 * self.obs_range**2)]) self.high = np.array([1.0 for i in range(3 * self.obs_range**2)]) if self.include_id: self.low = np.append(self.low, 0.0) self.high = np.append(self.high, 1.0) self.action_space = spaces.Discrete(n_act_purs) if self.flatten: self.observation_space = spaces.Box(self.low, self.high) else: self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(4, self.obs_range, self.obs_range)) self.local_obs = np.zeros( (self.n_pursuers, 4, self.obs_range, self.obs_range)) # Nagents X 3 X xsize X ysize self.act_dims = [n_act_purs for i in range(self.n_pursuers)] else: self.low = np.array([0.0 for i in range(3 * self.obs_range**2)]) self.high = np.array([1.0 for i in range(3 * self.obs_range**2)]) if self.include_id: np.append(self.low, 0.0) np.append(self.high, 1.0) self.action_space = spaces.Discrete(n_act_ev) if self.flatten: self.observation_space = spaces.Box(self.low, self.high) else: self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(4, self.obs_range, self.obs_range)) self.local_obs = np.zeros( (self.n_evaders, 4, self.obs_range, self.obs_range)) # Nagents X 3 X xsize X ysize self.act_dims = [n_act_purs for i in range(self.n_evaders)] self.pursuers_gone = np.array([False for i in range(self.n_pursuers)]) self.evaders_gone = np.array([False for i in range(self.n_evaders)]) self.initial_config = kwargs.pop('initial_config', {}) self.surround = kwargs.pop('surround', True) self.constraint_window = kwargs.pop('constraint_window', 1.0) self.curriculum_remove_every = kwargs.pop('curriculum_remove_every', 500) self.curriculum_constrain_rate = kwargs.pop('curriculum_constrain_rate', 0.0) self.curriculum_turn_off_shaping = kwargs.pop('curriculum_turn_off_shaping', np.inf) self.surround_mask = np.array([[-1, 0, 0], [1, 0, 0], [0, 1, 0], [0, -1, 0]]) self.model_state = np.zeros((4,) + map_matrix.shape, dtype=np.float32)
def __init__(self, n_pursuers, n_evaders, n_coop=2, n_poison=10, radius=0.015, obstacle_radius=0.2, obstacle_loc=np.array([0.5, 0.5]), ev_speed=0.01, poison_speed=0.01, n_sensors=30, sensor_range=0.2, action_scale=0.01, poison_reward=-1., food_reward=1., encounter_reward=.05, control_penalty=-.5, reward_mech='local', addid=True, speed_features=True, **kwargs): EzPickle.__init__(self, n_pursuers, n_evaders, n_coop, n_poison, radius, obstacle_radius, obstacle_loc, ev_speed, poison_speed, n_sensors, sensor_range, action_scale, poison_reward, food_reward, encounter_reward, control_penalty, reward_mech, addid, speed_features, **kwargs) self.n_pursuers = n_pursuers self.n_evaders = n_evaders self.n_coop = n_coop self.n_poison = n_poison self.obstacle_radius = obstacle_radius self.obstacle_loc = obstacle_loc self.poison_speed = poison_speed self.radius = radius self.ev_speed = ev_speed self.n_sensors = n_sensors self.sensor_range = np.ones(self.n_pursuers) * sensor_range self.action_scale = action_scale self.poison_reward = poison_reward self.food_reward = food_reward self.control_penalty = control_penalty self.encounter_reward = encounter_reward self.n_obstacles = 1 self._reward_mech = reward_mech self._addid = addid self._speed_features = speed_features self.seed() if kwargs.get('seed') != None: self.seed(kwargs['seed']) self._pursuers = [ Archea(npu + 1, self.radius * 1., self.n_sensors, self.sensor_range[npu], addid=self._addid, speed_features=self._speed_features, ally=self.n_pursuers) for npu in range(self.n_pursuers) ] self._evaders = [ Archea(nev + 1, self.radius * 1., self.n_pursuers, self.sensor_range.mean() / 2) for nev in range(self.n_evaders) ] self._poisons = [ Archea(npo + 1, self.radius * 1., self.n_poison, 0) for npo in range(self.n_poison) ] self.action_space, self.observation_space = [], [] for a_i in range(self.n_pursuers): self.action_space.append(self.agents[a_i].action_space) self.observation_space.append(self.agents[a_i].observation_space)