def spawn_resource(self): """ Spawn a new resource in the source area if it is possible to do so :return: x,y coordinate of new resource if successful. None otherwise """ # Places all resources resource_placed = False # If there is no space to spawn new resources, don't spawn if self.source_is_full(): return None while not resource_placed: x, y = self.generate_resource_position() if self.resource_map[y][x] == 0 and (x, y) not in self.resource_positions: self.resource_map[y][x] = self.latest_resource_id + 1 self.latest_resource_id += 1 self.resource_positions += [(x, y)] try: self.resource_transforms += [rendering.Transform()] except: pass self.resource_carried_by += [[]] resource_placed = True self.current_num_resources += 1 try: self.add_resource_to_rendering(self.latest_resource_id) except: pass return x, y
def add_resource_to_rendering(self, resource_id): resource = rendering.make_circle(self.resource_width / 2 * self.scale) resource.set_color(self.resource_colour[0], self.resource_colour[1], self.resource_colour[2]) resource.add_attr(rendering.Transform(translation=(0, 0))) resource.add_attr(self.resource_transforms[resource_id]) if self.viewer is not None: self.viewer.add_geom(resource)
def draw_arena_segment(self, top, bottom, left, right, rgb_tuple): """ Helper function that creates the geometry for a segment of the arena. Intended to be used by the viewer :param top: :param bottom: :param rgb_tuple: :return: A FilledPolygon object that can be added to the viewer using add_geom """ l, r, t, b = (left + self.x_shift) * self.scale, \ (right + self.x_shift) * self.scale, \ (top + self.y_shift) * self.scale, \ (bottom + self.y_shift) * self.scale arena_segment = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) arena_segment.add_attr(rendering.Transform(translation=(0, 0))) arena_transform = rendering.Transform() arena_segment.add_attr(arena_transform) arena_segment.set_color(rgb_tuple[0], rgb_tuple[1], rgb_tuple[2]) return arena_segment
def draw_arena_segment(self, top, bottom, rgb_tuple): """ Helper function that creates the geometry for a segment of the arena. Intended to be used by the viewer :param top: :param bottom: :param rgb_tuple: :return: A FilledPolygon object that can be added to the viewer using add_geom """ l, r, t, b = self.arena_constraints["x_min"] * self.scale, \ self.arena_constraints["x_max"] * self.scale, \ top * self.scale, \ bottom * self.scale arena_segment = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) arena_segment.add_attr(rendering.Transform(translation=(0, 0))) arena_transform = rendering.Transform() arena_segment.add_attr(arena_transform) arena_segment.set_color(rgb_tuple[0], rgb_tuple[1], rgb_tuple[2]) return arena_segment
def render(self, mode='human'): """ Renders the environment, placing all agents in appropriate positions :param mode: :return: """ screen_width = self.total_width * self.scale screen_height = self.total_height * self.scale if self.viewer is None: self.viewer = rendering.Viewer(screen_width, screen_height) # Draw up hall up_hall = self.draw_arena_segment( self.arena_constraints["y_max"], self.arena_constraints["y_max"] - self.hall_size, self.arena_constraints["x_min"], self.arena_constraints["x_min"] + self.start_zone_size, self.hall_colour) self.viewer.add_geom(up_hall) # Draw down hall down_hall = self.draw_arena_segment( self.arena_constraints["y_max"] - self.hall_size - self.start_zone_size, self.arena_constraints["y_min"], self.arena_constraints["x_min"], self.arena_constraints["x_min"] + self.start_zone_size, self.hall_colour) self.viewer.add_geom(down_hall) # Draw right hall right_hall = self.draw_arena_segment( self.arena_constraints["y_min"] + self.hall_size + self.start_zone_size, self.arena_constraints["y_min"] + self.hall_size, self.arena_constraints["x_min"] + self.start_zone_size, self.arena_constraints["x_max"], self.hall_colour) self.viewer.add_geom(right_hall) # Draw start zone start_zone = self.draw_arena_segment( self.arena_constraints["y_min"] + self.hall_size + self.start_zone_size, self.arena_constraints["y_min"] + self.hall_size, self.arena_constraints["x_min"], self.arena_constraints["x_min"] + self.start_zone_size, self.start_zone_colour) self.viewer.add_geom(start_zone) # Draw grid grid_lines = self.draw_grid() for line in grid_lines: self.viewer.add_geom(line) # Draw agent(s) for i in range(self.num_agents): agent = rendering.make_circle(self.agent_width / 2 * self.scale) agent.set_color(self.agent_colour[0], self.agent_colour[1], self.agent_colour[2]) agent.add_attr(rendering.Transform(translation=(0, 0))) agent.add_attr(self.agent_transforms[i]) self.viewer.add_geom(agent) # Draw obstacles for i in range(len(self.obstacle_coordinates)): obstacle = rendering.make_circle(self.obstacle_width / 2 * self.scale) obstacle.set_color(self.obstacle_colour[0], self.obstacle_colour[1], self.obstacle_colour[2]) obstacle.add_attr(rendering.Transform(translation=(0, 0))) obstacle.add_attr(self.obstacle_transforms[i]) self.viewer.add_geom(obstacle) # Set position of agent(s) for i in range(self.num_agents): self.agent_transforms[i].set_translation( (self.agent_positions[i][0] - self.arena_constraints["x_min"] + 0.5) * self.scale, (self.agent_positions[i][1] - self.arena_constraints["y_min"] + 0.5) * self.scale) # Set position of obstacle(s) for i, key in enumerate(self.obstacle_coordinates): self.obstacle_transforms[i].set_translation( (key[0] - self.arena_constraints["x_min"] + 0.5) * self.scale, (key[1] - self.arena_constraints["y_min"] + 0.5) * self.scale) return self.viewer.render(return_rgb_array=mode == 'rgb_array')
def __init__(self, parameter_filename=None): if parameter_filename is None: raise RuntimeError( "No parameter file specified for the environment") parameter_dictionary = json.loads(open(parameter_filename).read()) # self.seed_value = parameter_dictionary['general']['seed'] self.seed_value = parameter_dictionary['environment']['tmaze'][ 'env_seed'] self.np_random = np.random.RandomState(self.seed_value) # Environment dimensions self.hall_size = parameter_dictionary["environment"]["tmaze"][ "hall_size"] self.start_zone_size = parameter_dictionary["environment"]["tmaze"][ "start_zone_size"] if self.start_zone_size % 2 == 0: self.offset = 0 else: self.offset = 1 self.arena_constraints = { "x_min": -(self.start_zone_size // 2), "x_max": (self.start_zone_size // 2) + self.offset + self.hall_size, "y_min": -(self.start_zone_size // 2) - self.hall_size, "y_max": (self.start_zone_size // 2) + self.offset + self.hall_size } self.obstacle_generation_constraints = { "UP": { "x_min": self.arena_constraints["x_min"], "x_max": self.arena_constraints["x_min"] + self.start_zone_size, "y_min": self.arena_constraints["y_min"] + self.hall_size + self.start_zone_size, "y_max": self.arena_constraints["y_max"] }, "DOWN": { "x_min": self.arena_constraints["x_min"], "x_max": self.arena_constraints["x_min"] + self.start_zone_size, "y_min": self.arena_constraints["y_min"], "y_max": self.arena_constraints["y_min"] + self.hall_size }, "RIGHT": { "x_min": self.arena_constraints["x_min"] + self.start_zone_size, "x_max": self.arena_constraints["x_max"], "y_min": self.arena_constraints["y_min"] + self.hall_size, "y_max": self.arena_constraints["y_min"] + self.hall_size + self.start_zone_size } } self.total_width = self.start_zone_size + self.hall_size self.total_height = self.start_zone_size + self.hall_size + self.hall_size self.x_shift = abs(self.arena_constraints["x_min"]) self.y_shift = abs(self.arena_constraints["y_min"]) self.num_obstacles_per_hall = parameter_dictionary["environment"][ "tmaze"]["num_obstacles_per_hall"] self.obstacle_coordinates = {} self.place_obstacles() # Constants and variables self.agent_width = 0.8 self.obstacle_width = 1.0 self.num_agents = parameter_dictionary["environment"]["tmaze"][ "num_agents"] self.num_episodes = parameter_dictionary["environment"]["tmaze"][ "num_episodes"] self.episode_length = parameter_dictionary["environment"]["tmaze"][ "episode_length"] self.reward_structure = parameter_dictionary["environment"]["tmaze"][ "reward_structure"] if self.num_episodes > 1: raise RuntimeError( "All episodes are identical in TMaze. Modify the reset function to add this functionality" ) if self.reward_structure == "sparse": self.top_goal = self.arena_constraints[ "y_max"] - self.hall_size // 2 self.bottom_goal = self.arena_constraints[ "y_min"] + self.hall_size // 2 self.right_goal = self.arena_constraints[ "x_max"] - self.hall_size // 2 self.specialised_actions = 0 self.total_rewarded_actions = 0 # Novelty constants self.bc_measure = parameter_dictionary['environment']['tmaze'][ 'bc_measure'] self.avg_pos_for_agent = [[0, 0] for _ in range(self.num_agents)] # Rendering constants self.scale = 40 self.start_zone_colour = [0.5, 0.5, 0.5] self.hall_colour = [0.25, 0.5, 0.5] self.agent_colour = [0, 0, 0.25] self.obstacle_colour = [0.0, 0.0, 0.0] # Rendering variables self.viewer = None self.agent_transforms = None self.obstacle_transforms = None # self.resource_transforms = None try: self.agent_transforms = [ rendering.Transform() for _ in range(self.num_agents) ] self.obstacle_transforms = [ rendering.Transform() for _ in range(self.num_obstacles_per_hall * 3) ] # self.resource_transforms = [rendering.Transform() for i in range(self.default_num_resources)] except: pass self.agent_positions = self.generate_agent_positions() # Step variables self.behaviour_map = [self.up, self.down, self.right, self.left] self.action_name = ["UP", "DOWN", "RIGHT", "LEFT"] # Observation space # Agent's x-coordinate and y-coordinate self.observation_space_size = 2 # Action space # 0- Forward, 1- Backward, 2- Right, 3- Left self.action_space_size = 4
def render(self, mode='human'): """ Renders the environment, placing all agents and resources in appropriate positions :param mode: :return: """ screen_width = self.arena_constraints["x_max"] * self.scale screen_height = self.arena_constraints["y_max"] * self.scale if self.viewer is None: self.viewer = rendering.Viewer(screen_width, screen_height) # Draw nest nest = self.draw_arena_segment(self.nest_size, self.nest_start, self.nest_colour) self.viewer.add_geom(nest) # Draw cache cache = self.draw_arena_segment(self.cache_start + self.cache_size, self.cache_start, self.cache_colour) self.viewer.add_geom(cache) # Draw slope slope = self.draw_arena_segment(self.slope_start + self.slope_size, self.slope_start, self.slope_colour) self.viewer.add_geom(slope) # Draw source source = self.draw_arena_segment(self.source_start + self.source_size, self.source_start, self.source_colour) self.viewer.add_geom(source) # Draw grid grid_lines = self.draw_grid() for line in grid_lines: self.viewer.add_geom(line) # Draw agent(s) for i in range(self.num_agents): agent = rendering.make_circle(self.agent_width / 2 * self.scale) agent.set_color(self.agent_colour[0], self.agent_colour[1], self.agent_colour[2]) agent.add_attr( rendering.Transform( translation=( 0, 0))) agent.add_attr(self.agent_transforms[i]) self.viewer.add_geom(agent) # Draw resource(s) for i in range(self.default_num_resources): resource = rendering.make_circle(self.resource_width / 2 * self.scale) resource.set_color(self.resource_colour[0], self.resource_colour[1], self.resource_colour[2]) resource.add_attr( rendering.Transform( translation=( 0, 0))) resource.add_attr(self.resource_transforms[i]) self.viewer.add_geom(resource) # Set position of agent(s) for i in range(self.num_agents): self.agent_transforms[i].set_translation( (self.agent_positions[i][0] - self.arena_constraints["x_min"] + 0.5) * self.scale, (self.agent_positions[i][1] - self.arena_constraints["y_min"] + 0.5) * self.scale) # Set position of resource(s) for i in range(len(self.resource_positions)): self.resource_transforms[i].set_translation( (self.resource_positions[i][0] - self.arena_constraints["x_min"] + 0.5) * self.scale, (self.resource_positions[i][1] - self.arena_constraints["y_min"] + 0.5) * self.scale) return self.viewer.render(return_rgb_array=mode == 'rgb_array')
def reset(self): """ """ # Make sure agents and resources will all fit in the environment assert self.num_agents <= self.arena_constraints[ "x_max"] * self.nest_size, "Not enough room in the nest for all agents" assert self.default_num_resources <= self.arena_constraints[ "x_max"] * self.source_size, "Not enough room in the source for all resources" try: self.viewer.close() except: pass self.viewer = None self.resource_positions = [None for i in range(self.default_num_resources)] self.resource_carried_by = [[] for i in range(self.default_num_resources)] try: self.resource_transforms = [rendering.Transform() for i in range(self.default_num_resources)] except: pass self.latest_resource_id = self.default_num_resources - 1 # Creates empty state self.agent_map = self.generate_arena() # Empty agent map self.resource_map = self.generate_arena() # Empty resource map # Places all agents for i in range(self.num_agents): agent_placed = False while not agent_placed: x, y = self.generate_agent_position() if self.agent_map[y][x] == 0: self.agent_map[y][x] = i + 1 self.agent_positions[i] = (x, y) agent_placed = True # Places all resources for i in range(self.default_num_resources): resource_placed = False while not resource_placed: x, y = self.generate_resource_position() if self.resource_map[y][x] == 0: self.resource_map[y][x] = i + 1 self.resource_positions[i] = (x, y) resource_placed = True ''' # Places straight line of resources for i in range(self.default_num_resources): x, y = i, self.arena_constraints["y_max"]-1 self.resource_map[y][x] = i + 1 self.resource_positions[i] = (x,y) ''' # NOTE: To change this, must also change the observation space in __init__ self.state = np.concatenate((self.agent_map, self.resource_map), axis=0) # Reset variables that were changed during runtime self.has_resource = [None for i in range(self.num_agents)] self.current_num_resources = self.default_num_resources # return np.array(self.state) return self.get_agent_observations()
def __init__(self, parameter_filename=None): """ Initialises constants and variables for agents, resources and environment :param """ if parameter_filename is None: raise RuntimeError("No parameter file specified for the environment") parameter_dictionary = json.loads(open(parameter_filename).read()) try: self.observation_version = parameter_dictionary['environment']['observation_version'] except KeyError: self.observation_version = "complex" # Environment dimensions self.arena_constraints = {"x_min": 0, "x_max": parameter_dictionary['environment']['arena_width'], "y_min": 0, "y_max": parameter_dictionary['environment']['arena_length']} self.nest_size = parameter_dictionary['environment']['cache_start'] self.cache_size = parameter_dictionary['environment']['slope_start'] - parameter_dictionary['environment'][ 'cache_start'] self.slope_size = parameter_dictionary['environment']['source_start'] - parameter_dictionary['environment'][ 'slope_start'] self.source_size = parameter_dictionary['environment']['arena_length'] - parameter_dictionary['environment'][ 'source_start'] self.nest_start = self.arena_constraints["y_min"] self.cache_start = parameter_dictionary['environment']['cache_start'] self.slope_start = parameter_dictionary['environment']['slope_start'] self.source_start = parameter_dictionary['environment']['source_start'] self.num_arena_tiles = self.arena_constraints["x_max"] * self.arena_constraints["y_max"] self.sliding_speed = parameter_dictionary['environment']['sliding_speed'] # agent constants self.agent_width = 0.8 self.sensor_range = parameter_dictionary['environment']['sensor_range'] # Resource constants self.resource_width = 0.6 self.base_cost = parameter_dictionary['environment']['base_cost'] self.reward_for_resource = parameter_dictionary['environment']['resource_reward'] self.upward_cost_factor = parameter_dictionary['environment']['upward_cost_factor'] self.downward_cost_factor = parameter_dictionary['environment']['downward_cost_factor'] self.carry_factor = parameter_dictionary['environment']['carry_factor'] # Other constants and variables self.num_agents = parameter_dictionary['environment']['num_agents'] self.default_num_resources = parameter_dictionary['environment']['num_resources'] self.current_num_resources = self.default_num_resources self.latest_resource_id = self.default_num_resources - 1 self.dumping_position = (-10, -10) # Rendering constants self.scale = 50 # Scale for rendering self.nest_colour = [0.25, 0.25, 0.25] self.cache_colour = [0.5, 0.5, 0.5] self.slope_colour = [0.5, 0.25, 0.25] self.source_colour = [0.25, 0.5, 0.5] self.agent_colour = [0, 0, 0.25] self.resource_colour = [0, 0.25, 0] # Rendering variables self.viewer = None self.agent_transforms = None self.resource_transforms = None try: self.agent_transforms = [rendering.Transform() for i in range(self.num_agents)] self.resource_transforms = [rendering.Transform() for i in range(self.default_num_resources)] except: pass self.agent_positions = [None] * self.num_agents self.resource_positions = [None] * self.default_num_resources self.resource_carried_by = [[]] * self.default_num_resources # Step variables self.behaviour_map = [self.forward_step, self.backward_step, self.left_step, self.right_step] self.action_name = ["FORWARD", "BACKWARD", "LEFT", "RIGHT", "PICKUP", "DROP"] self.has_resource = [None for i in range(self.num_agents)] self.seed_value = parameter_dictionary['general']['seed'] self.np_random = np.random.RandomState(self.seed_value) # Observation space (additional details explained in self.get_agent_observations()) # Range=1 -> 9 tiles. Range=2 -> 25 tiles. Agent at the center. self.tiles_in_sensing_range = (2 * self.sensor_range + 1) ** 2 if self.observation_version == "simple": # 1 bit for each tile in range + 4 bits for location + 1 bit for object detection + 1 bit for object possession self.observation_space_size = self.tiles_in_sensing_range + 4 + 1 + 1 else: # Tiles in sensing range are onehotencoded + 4 bits for location + 1 bit for object possession self.observation_space_size = self.tiles_in_sensing_range * 4 + 4 + 1 # Action space # 0- Forward, 1- Backward, 2- Left, 3- Right, 4- Pick up, 5- Drop self.action_space_size = 6
def render(self, mode='human'): """ Renders the environment, placing all agents and boxes in appropriate positions :param mode: :return: """ screen_width = self.arena_constraints["x_max"] * self.scale screen_height = self.arena_constraints["y_max"] * self.scale if self.viewer is None: self.viewer = rendering.Viewer(screen_width, screen_height) home_top = self.arena_constraints["y_min"] + self.home_length main_top = self.arena_constraints["y_max"] - self.goal_length goal_top = self.arena_constraints["y_max"] # Draw home home = self.draw_arena_segment(home_top, self.arena_constraints["y_min"], self.home_colour) self.viewer.add_geom(home) # Draw main main_area = self.draw_arena_segment(main_top, home_top, self.main_colour) self.viewer.add_geom(main_area) # Draw goal goal = self.draw_arena_segment(goal_top, main_top, self.goal_colour) self.viewer.add_geom(goal) # Draw grid grid_lines = self.draw_grid() for line in grid_lines: self.viewer.add_geom(line) # Draw agent(s) for i in range(self.num_agents): agent = rendering.FilledPolygon([ (0, 0), (self.agent_width * self.scale, 0), (self.agent_width / 2 * self.scale, self.agent_height * self.scale) ]) agent.set_color(self.agent_colour[0], self.agent_colour[1], self.agent_colour[2]) agent.add_attr( rendering.Transform( translation=(-self.agent_width / 2 * self.scale, -self.agent_height / 2 * self.scale))) agent.add_attr(self.agent_transforms[i]) self.viewer.add_geom(agent) # Draw box(es) for i in range(self.num_small_boxes + self.num_medium_boxes + self.num_large_boxes): if i < self.num_small_boxes and self.num_small_boxes > 0: l, r, t, b = -self.small_box_width / 2 * self.scale, self.small_box_width / 2 * self.scale, self.small_box_width * self.scale, 0 elif i >= self.num_small_boxes and self.num_medium_boxes > 0: l, r, t, b = -self.medium_box_width / 2 * self.scale, self.medium_box_width / 2 * self.scale, self.small_box_width * self.scale, 0 elif i >= self.num_small_boxes and self.num_large_boxes > 0: l, r, t, b = -self.large_box_width / 2 * self.scale, self.large_box_width / 2 * self.scale, self.small_box_width * self.scale, 0 box = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) box.set_color(self.box_colour[0], self.box_colour[1], self.box_colour[2]) box.add_attr(rendering.Transform(translation=(0, 0))) box.add_attr(self.box_transforms[i]) self.viewer.add_geom(box) # Set position of agent(s) for i in range(self.num_agents): self.agent_transforms[i].set_translation( (self.agent_positions[i][0] - self.arena_constraints["x_min"] + 0.5) * self.scale, (self.agent_positions[i][1] - self.arena_constraints["y_min"] + 0.5) * self.scale) # Set position of box(es) for box_id, box_details in self.boxes_in_arena.items(): if box_details[2] == "small": self.box_transforms[box_id].set_translation( (box_details[0] - self.arena_constraints["x_min"] + 0.5) * self.scale, (box_details[1] - self.arena_constraints["y_min"] + 0.125) * self.scale) elif box_details[2] == "medium": self.box_transforms[box_id].set_translation( (box_details[0] - self.arena_constraints["x_min"] + (0.5 * self.medium_box_size)) * self.scale, (box_details[1] - self.arena_constraints["y_min"] + 0.125) * self.scale) elif box_details[2] == "large": self.box_transforms[box_id].set_translation( (box_details[0] - self.arena_constraints["x_min"] + (0.5 * self.num_agents)) * self.scale, (box_details[1] - self.arena_constraints["y_min"] + 0.125) * self.scale) return self.viewer.render(return_rgb_array=mode == 'rgb_array')
def __init__(self, parameter_filename=None): if parameter_filename is None: raise RuntimeError( "No parameter file specified for the environment") parameter_dictionary = json.loads(open(parameter_filename).read()) self.seed_value = parameter_dictionary['environment']['box_pushing'][ 'env_seed'] self.np_random = np.random.RandomState(self.seed_value) # Cooperation parameters if parameter_dictionary['environment']['box_pushing'][ 'defection'] == "True": self.defection = True elif parameter_dictionary['environment']['box_pushing'][ 'defection'] == "False": self.defection = False if parameter_dictionary['environment']['box_pushing'][ 'partial_cooperation'] == "True": self.partial_cooperation = True elif parameter_dictionary['environment']['box_pushing'][ 'partial_cooperation'] == "False": self.partial_cooperation = False self.sensing = parameter_dictionary['environment']['box_pushing'][ 'sensing'] if parameter_dictionary['environment']['box_pushing'][ 'specialisation'] == "True": self.specialisation = True elif parameter_dictionary['environment']['box_pushing'][ 'specialisation'] == "False": self.specialisation = False self.environment_scaling = parameter_dictionary['environment'][ 'box_pushing']['environment_scaling'] if parameter_dictionary['environment']['box_pushing'][ 'reward_sharing'] == "True": self.reward_sharing = True elif parameter_dictionary['environment']['box_pushing'][ 'reward_sharing'] == "False": self.reward_sharing = False self.time_scaling = parameter_dictionary['environment']['box_pushing'][ 'time_scaling'] if parameter_dictionary['environment']['box_pushing'][ 'sparse_rewards'] == "True": self.sparse_rewards = True elif parameter_dictionary['environment']['box_pushing'][ 'sparse_rewards'] == "False": self.sparse_rewards = False # Environment self.num_agents = parameter_dictionary['environment']['box_pushing'][ 'num_agents'] self.arena_length = parameter_dictionary['environment']['box_pushing'][ 'arena_length'] self.arena_width = parameter_dictionary['environment']['box_pushing'][ 'min_arena_width'] * self.num_agents if self.defection: self.num_small_boxes = self.num_agents else: self.num_small_boxes = 0 if self.partial_cooperation: self.medium_box_size = parameter_dictionary['environment'][ 'box_pushing']['medium_box_size'] self.num_medium_boxes = self.num_agents // self.medium_box_size self.num_large_boxes = 0 else: self.num_medium_boxes = 0 self.num_large_boxes = 1 # 1 box pushed by all agents if self.time_scaling == "variable": self.episode_length = parameter_dictionary['environment'][ 'box_pushing']['min_episode_length'] * self.num_agents elif self.time_scaling == "constant": self.episode_length = parameter_dictionary['environment'][ 'box_pushing']['min_episode_length'] else: raise RuntimeError( "Time scaling must be either constant or variable") self.num_episodes = parameter_dictionary['environment']['box_pushing'][ 'num_episodes'] self.arena_constraints = { "x_min": 0, "x_max": self.arena_width, "y_min": 0, "y_max": self.arena_length } self.home_length = self.goal_length = 1 self.agent_width = 0.8 self.small_box_width = 0.8 if self.partial_cooperation: self.medium_box_width = self.medium_box_size - 0.2 self.large_box_width = self.num_agents - 0.2 self.agent_height = 0.4 # Rewards self.large_reward_per_agent = parameter_dictionary['environment'][ 'box_pushing']['large_reward_per_agent'] self.small_reward_per_agent = parameter_dictionary['environment'][ 'box_pushing']['small_reward_per_agent'] self.cost_per_time_step = parameter_dictionary['environment'][ 'box_pushing']['cost_per_time_step'] # Rendering constants self.scale = 40 self.goal_colour = [0.5, 0.5, 0.5] self.main_colour = [0.25, 0.5, 0.5] self.home_colour = [0.5, 0.6, 0.6] self.agent_colour = [0, 0, 0.25] self.box_colour = [0, 0.25, 0] # Rendering variables self.viewer = None self.agent_transforms = None self.box_transforms = None try: self.agent_transforms = [ rendering.Transform() for _ in range(self.num_agents) ] self.box_transforms = [ rendering.Transform() for _ in range(self.num_large_boxes + self.num_medium_boxes + self.num_small_boxes) ] except: pass self.agent_positions = [None] * self.num_agents self.boxes_in_arena = {} # Step variables self.behaviour_map = [ self.forward, self.rotate_right, self.rotate_left, self.stay ] self.action_name = ["FORWARD", "ROTATE RIGHT", "ROTATE LEFT", "STAY"] # Observation space if self.sensing == "local": # Onehotencoded vector, with possibilities for each of the 3 box sizes, an agent, a wall, or an empty spot self.observation_space_size = 6 # Action space # 0- Forward, 1- Rotate right, 2- Rotate left, 3- Stay self.action_space_size = 4 # Given an agent's orientation, add these values to the agent's x and y to get the block in front of them self.orientation_map = { "NORTH": (0, 1), "SOUTH": (0, -1), "EAST": (1, 0), "WEST": (-1, 0) }
def __init__(self, parameter_filename=None): """ Initialises constants and variables for agents, resources and environment :param """ if parameter_filename is None: raise RuntimeError( "No parameter file specified for the environment") parameter_dictionary = json.loads(open(parameter_filename).read()) # Environment dimensions self.arena_constraints = { "x_min": 0, "x_max": parameter_dictionary['environment']['slope']['arena_width'], "y_min": 0, "y_max": parameter_dictionary['environment']['slope']['arena_length'] } self.nest_size = parameter_dictionary['environment']['slope'][ 'cache_start'] self.cache_size = parameter_dictionary['environment']['slope'][ 'slope_start'] - parameter_dictionary['environment']['slope'][ 'cache_start'] self.slope_size = parameter_dictionary['environment']['slope'][ 'source_start'] - parameter_dictionary['environment']['slope'][ 'slope_start'] self.source_size = parameter_dictionary['environment']['slope'][ 'arena_length'] - parameter_dictionary['environment']['slope'][ 'source_start'] self.nest_start = self.arena_constraints["y_min"] self.cache_start = parameter_dictionary['environment']['slope'][ 'cache_start'] self.slope_start = parameter_dictionary['environment']['slope'][ 'slope_start'] self.source_start = parameter_dictionary['environment']['slope'][ 'source_start'] self.num_arena_tiles = self.arena_constraints[ "x_max"] * self.arena_constraints["y_max"] self.sliding_speed = parameter_dictionary['environment']['slope'][ 'sliding_speed'] # agent constants self.agent_width = 0.8 self.sensor_range = parameter_dictionary['environment']['slope'][ 'sensor_range'] # Resource constants self.resource_width = 0.6 self.base_cost = parameter_dictionary['environment']['slope'][ 'base_cost'] self.reward_for_resource = parameter_dictionary['environment'][ 'slope']['resource_reward'] self.upward_cost_factor = parameter_dictionary['environment']['slope'][ 'upward_cost_factor'] self.downward_cost_factor = parameter_dictionary['environment'][ 'slope']['downward_cost_factor'] self.carry_factor = parameter_dictionary['environment']['slope'][ 'carry_factor'] # Other constants and variables self.num_agents = parameter_dictionary['environment']['slope'][ 'num_agents'] self.default_num_resources = parameter_dictionary['environment'][ 'slope']['num_resources'] self.episode_length = parameter_dictionary['environment']['slope'][ 'episode_length'] self.max_resources = self.episode_length * self.num_agents # It is impossible to collect this many resources self.current_num_resources = self.default_num_resources self.latest_resource_id = self.default_num_resources - 1 if parameter_dictionary['environment']['slope'][ 'incremental_rewards'] == "True": self.incremental_rewards = True elif parameter_dictionary['environment']['slope'][ 'incremental_rewards'] == "False": self.incremental_rewards = False else: raise RuntimeError( "Incremental rewards is not set to True or False") raise RuntimeError( "Incremental rewards is not set to True or False") # Rendering constants self.scale = 50 # Scale for rendering self.nest_colour = [0.25, 0.25, 0.25] self.cache_colour = [0.5, 0.5, 0.5] self.slope_colour = [0.5, 0.25, 0.25] self.source_colour = [0.25, 0.5, 0.5] self.agent_colour = [0, 0, 0.25] self.resource_colour = [0, 0.25, 0] # Rendering variables self.viewer = None self.agent_transforms = None self.resource_transforms = None try: self.agent_transforms = [ rendering.Transform() for i in range(self.num_agents) ] self.resource_transforms = [ rendering.Transform() for i in range(self.default_num_resources) ] except: pass self.agent_positions = [None] * self.num_agents self.resources_in_arena = {} self.resource_carried_by = [[False for i in range(self.num_agents)] for j in range(self.max_resources)] self.closest_y_for_resource = {} self.resource_history = [ { "dropped_on_slope": False, # True/False (False unless the resource was dropped on the slope once) "dropper_index": -1, # index of agent that dropped the resource on the slope "collected_from_cache": False, # True/False (The last time it was picked up, was it picked up on the cache?) "collector_index": -1, # index of agent who last picked it up on the cache (if the resource is delivered, this will also be the agent that delivers it) "retrieved": False # Was the resource retrieved } for i in range(self.max_resources) ] self.agent_trajectories = [[None for _ in range(self.episode_length)] for _ in range(self.num_agents)] self.last_trajectory_recorded = 0 self.total_resources_retrieved = 0 # Step variables self.behaviour_map = [ self.forward_step, self.backward_step, self.left_step, self.right_step ] self.action_name = [ "FORWARD", "BACKWARD", "LEFT", "RIGHT", "PICKUP", "DROP" ] self.has_resource = [None] * self.num_agents #self.seed_value = parameter_dictionary['general']['seed'] self.seed_value = parameter_dictionary['environment']['slope'][ 'env_seed'] self.np_random = np.random.RandomState(self.seed_value) # Observation space (additional details explained in self.get_agent_observations()) # Range=1 -> 9 tiles. Range=2 -> 25 tiles. Agent at the center. self.tiles_in_sensing_range = (2 * self.sensor_range + 1)**2 # 1 bit for each tile in range + 4 bits for location + 1 bit for object possession self.observation_space_size = self.tiles_in_sensing_range + 4 + 1 # Action space # 0- Forward, 1- Backward, 2- Left, 3- Right, 4- Pick up, 5- Drop self.action_space_size = 6 # Novelty constants self.bc_measure = parameter_dictionary['environment']['slope'][ 'bc_measure'] self.avg_pos_for_agent = [[0, 0] for _ in range(self.num_agents)] self.agent_action_count = [[0 for _ in range(self.action_space_size)] for _ in range(self.num_agents)]
def reset(self): """ """ # Make sure agents and resources will all fit in the environment assert self.num_agents <= self.arena_constraints[ "x_max"] * self.nest_size, "Not enough room in the nest for all agents" assert self.default_num_resources <= self.arena_constraints[ "x_max"] * self.source_size, "Not enough room in the source for all resources" try: self.viewer.close() except: pass self.viewer = None self.resources_in_arena = {} self.resource_carried_by = [[False for i in range(self.num_agents)] for j in range(self.max_resources)] self.closest_y_for_resource = {} self.resource_history = [{ "dropped_on_slope": False, "dropper_index": -1, "collected_from_cache": False, "collector_index": -1, "retrieved": False } for i in range(self.max_resources)] try: self.resource_transforms = [ rendering.Transform() for i in range(self.default_num_resources) ] except: pass self.latest_resource_id = self.default_num_resources - 1 # Creates empty state self.agent_map = self.generate_arena() # Empty agent map self.resource_map = self.generate_arena() # Empty resource map # Places all agents for i in range(self.num_agents): agent_placed = False while not agent_placed: x, y = self.generate_agent_position() if self.agent_map[y][x] == 0: self.agent_map[y][x] = i + 1 self.agent_positions[i] = (x, y) agent_placed = True # Places all resources for i in range(self.default_num_resources): resource_placed = False while not resource_placed: x, y = self.generate_resource_position() if self.resource_map[y][x] == 0: self.resource_map[y][x] = i + 1 self.resources_in_arena[i] = (x, y) self.closest_y_for_resource[i] = y resource_placed = True # Reset variables that were changed during runtime self.has_resource = [None] * self.num_agents self.current_num_resources = self.default_num_resources self.agent_trajectories = [[None for _ in range(self.episode_length)] for _ in range(self.num_agents)] for agent_id in range(self.num_agents): self.agent_trajectories[agent_id][0] = self.agent_positions[ agent_id][1] self.last_trajectory_recorded = 0 self.total_resources_retrieved = 0 # Reset BC self.avg_pos_for_agent = [[0, 0] for _ in range(self.num_agents)] self.agent_action_count = [[0 for _ in range(self.action_space_size)] for _ in range(self.num_agents)] return self.get_agent_observations()