def __init__(self, position, grid_size, play): self.play = play self.grid_size = grid_size self.number_state = 2 * grid_size.x * grid_size.y + 1 self.number_actions = len(Direction.cardinal()) self.q = np.zeros((self.number_state, self.number_actions)) self.cardinal = Direction.cardinal() self.state_id = 0 self.position = self.encode_position(position)
def __init__(self, position, initial_state, terminal_state, grid_size_option, play): """ here grid_size_option is the size of the zone ! """ self.play = play self.grid_size_option = grid_size_option self.number_state = grid_size_option.x * grid_size_option.y self.number_actions = len(Direction.cardinal()) self.q = np.zeros((self.number_state, self.number_actions)) self.cardinal = Direction.cardinal() self.position = self.get_position(position) self.initial_state = initial_state self.terminal_state = terminal_state
def _next_state(self, state, direction): collisions = {d: obj_names(self.world.collision(state["hero"], state["other_objects"], d)) for d in [None, Direction.S, Direction.SE, Direction.SW]} new_state = {"hero": state["hero"], "other_objects": state["other_objects"], "falling_dir": state["falling_dir"], "going_to_die": state["going_to_die"], "last_dir": state["last_dir"]} #Are we falling? if not 'F' in collisions[Direction.S] and not 'R' in collisions[None]: direction = None # We don't allow actions if not touching the floor or not on a rope if new_state["falling_dir"]: new_state["going_to_die"] = True else: if new_state["last_dir"] in Direction.all_west(): new_state["falling_dir"] = Direction.SW elif new_state["last_dir"] in Direction.all_east(): new_state["falling_dir"] = Direction.SE else: new_state["falling_dir"] = Direction.S new_state["hero"] = self.move(state["hero"], new_state["falling_dir"], check_collision_objects=state["other_objects"]) if state["hero"].pos == new_state["hero"].pos: # position did not change # couldn't move! probably because of an horizontal collision with floor (colliding with a corner while falling). Try going down: new_state["falling_dir"] = Direction.S new_state["hero"] = self.move(new_state["hero"], new_state["falling_dir"], check_collision_objects=state["other_objects"]) else: new_state["falling_dir"] = None new_state["going_to_die"] = False # If we are in a rope, we cannot move right or left (but we can jump right or left) if direction in [Direction.E, Direction.W]: if 'R' in collisions[None] and not 'F' in collisions[Direction.SE]+collisions[Direction.SW]: direction = None new_state["hero"] = self.move(new_state["hero"], direction, check_collision_objects=state["other_objects"]) new_state["last_dir"] = direction superpositions = obj_names(self.world.collision(new_state["hero"], new_state["other_objects"], direction=None)) if 'G' in superpositions: return new_state, 1.0, True, {} elif new_state["going_to_die"]: collisions_south = obj_names(self.world.collision(new_state["hero"], new_state["other_objects"], direction=Direction.S)) if 'F' in collisions_south and 'R' not in superpositions: return new_state, 0.0, True, {} return new_state, 0.0, False, {}
class MoveToBeaconEnv(HeroEnv): STATE_MAP = {(0, 'B'): (0, 1.0, True, None)} ACTION_MAP = Direction.cardinal() def create_world(self): self.game_state['hero'] = self.reset_world() return self.world def reset_world(self): self.world = GridWorld((10, 10)) quadrant_hero = np.random.randint(4) quadrant_beacon = np.random.choice( list(set(range(4)) - {quadrant_hero})) hero_pos = self.generate_random_position() beacon_pos = self.generate_random_position() while beacon_pos == hero_pos: beacon_pos = self.generate_random_position() hero = self.world.add_object( GridObject('H', hero_pos, Color.green, render_preference=1)) beacon = self.world.add_object( GridObject('B', beacon_pos, Color.darkOrange)) return hero def generate_random_position(self): x = np.random.randint(0, self.world.grid_size.x) y = np.random.randint(0, self.world.grid_size.y) return (x, y)
class Beacon1DEnv(HeroEnv): STATE_MAP = {(0, 'B'): (0, 1.0, True, None)} ACTION_MAP = Direction.left_right() def create_world(self): self.game_state['hero'] = self.reset_world() return self.world def reset_world(self): self.world = GridWorld((10, 1)) locations = self.generate_instance_positions(instance=level) hero_pos = (0, 0) hero = self.world.add_object( GridObject('H', hero_pos, Color.green, render_preference=1)) beacon = self.world.add_object( GridObject('B', locations[-1], Color.darkOrange)) locations.remove(locations[-1]) # Add walls to the right of the goal while len(locations): wall = self.world.add_object( GridObject('W', locations[-1], Color.white)) # wall.collides_with(hero) #Make it block the hero's way (not really needed rightnow since ends at goal, no transitions added) locations.remove(locations[-1]) return hero def generate_instance_positions(self, instance=0): # Add object positions positions = [] for t in range(instance + 1): positions.append((self.world.grid_size.x - (t + 1), 0)) return positions
def move(self, obj, direction): if direction: dx, dy = direction.value bb = obj.bounding_box if bb[0].x + dx >= 0 and bb[1].x + dx <= self.world.grid_size.x and bb[0].y + dy >= 0 and bb[1].y + dy <= self.world.grid_size.y: others = self.world.collisions(obj, direction) if dx != 0 and dy != 0: # diagonal move, also check cardinal positions before trying to move diagonally others.extend(self.world.collisions(obj, Direction(Point(dx, 0)))) others.extend(self.world.collisions(obj, Direction(Point(0, dy)))) #we may have repeated objects for other in others: if other.name in self.BLOCKS: return False obj.pos += (dx, dy) else: return False return True
def move_hero(self, direction): collisions = self.world.all_collisions(self.game_state["hero"], return_names=True) #Are we falling? if not 'F' in collisions[Direction.S] and not 'R' in collisions[None]: if self.falling_direction: self.going_to_die = True else: if self.last_direction in Direction.all_west(): self.falling_direction = Direction.SW elif self.last_direction in Direction.all_east(): self.falling_direction = Direction.SE else: self.falling_direction = Direction.S res = super(MontezumaEnv, self).move_hero(self.falling_direction) if not res: #couldn't move! probably because of an horizontal collision with floor (colliding with a corner while falling). Try going down: self.falling_direction = Direction.S res = super(MontezumaEnv, self).move_hero(self.falling_direction) return res else: self.falling_direction = None self.going_to_die = False #ILEGAL ACTIONS: #To move we need to touch the floor if not 'R' in collisions[None] and not 'F' in collisions[Direction.S]: #Do not allow going up or down if not touching the floor or not on a rope direction = None #If we are in a rope, we cannot move right or left (but we can jump right or left) if direction in [Direction.E, Direction.W]: if 'R' in collisions[None] and not 'F' in collisions[ Direction.SE] + collisions[Direction.SW]: direction = None self.last_direction = direction return super(MontezumaEnv, self).move_hero(direction)
def all_collisions(self, obj: GridObject, objects=None, return_names=False): if objects is None: objects = self.objects # With all objects neighbor_objs = dict([(d, []) for d in Direction.all() + [None]]) if len(objects) > 0: if type(objects[0]) is str: objects = self.get_objects_by_names(objects, self.objects) for direction in Direction.all() + [None]: for other in objects: if obj.collides_with(other, direction): neighbor_objs[direction].append(other) if return_names: for d in neighbor_objs.keys(): neighbor_objs[d] = list( set([obj.name for obj in neighbor_objs[d]])) return neighbor_objs
def move(self, obj, direction, check_collision_objects): if direction is not None: dx, dy = direction.value if obj.pos[0] + dx >= 0 and obj.pos[0] + dx < self.grid_size[0] \ and obj.pos[1] + dy >= 0 and obj.pos[1] + dy < self.grid_size[1]: others = self.world.collision(obj, check_collision_objects, direction) if dx != 0 and dy != 0: # diagonal move, also check cardinal positions before trying to move diagonally others.extend( self.world.collision(obj, check_collision_objects, Direction((dx, 0)))) others.extend( self.world.collision( obj, check_collision_objects, Direction( (0, dy)))) # we may have repeated objects for other in others: if other.name in self.block_names: return obj return obj._replace(pos=(obj.pos[0] + dx, obj.pos[1] + dy)) return obj
def __init__(self, str_map, colors, hero_mark, actions=[ None, ] + Direction.cardinal(), block_marks={}, max_moves=200, pixel_size=(84, 84)): self.str_map = str_map self.colors = colors self.hero_mark = hero_mark self.block_marks = block_marks super(StrHeroEnv, self).__init__(size=(len(str_map[0]), len(str_map)), actions=actions, max_moves=max_moves, pixel_size=pixel_size)
def __init__(self, size, actions=[ None, ] + Direction.cardinal(), block_names=[], max_moves=None, **kwargs): super(HeroEnv, self).__init__(n_actions=len(actions), max_moves=max_moves, **kwargs) try: x, y = size except TypeError: size = size, size self.grid_size = size self.actions = actions self.block_names = block_names
def __init__(self, size, **kwargs): super(MoveToBeaconEnv, self).__init__(size=size, actions=Direction.cardinal(), using_immutable_states=True, fixed_init_state=False, **kwargs)
def act(self): # For the moment we do a stupid thing: go random, until it finds a new zone direction_number = np.random.randint(4) cardinal = Direction.cardinal() return cardinal[direction_number]
class HeroEnv(GridEnv): """ Abstract class for environments with a single agent (hero) that can be moved around the grid """ ACTION_MAP = [None, ] + Direction.cardinal() # Default actions: Noop, north, east, south west. STATE_MAP = dict() # Description of the state machine. TODO: describe BLOCKS = set() # Names of objects that cannot be trespassed by the hero def __init__(self, max_moves=None, obs_type="image"): self.game_state = {'done': True} self.max_moves = max_moves assert self.max_moves is None or self.max_moves > 0 GridEnv.__init__(self, len(self.ACTION_MAP), obs_type=obs_type) def _clone(self): return (self.world, self.game_state) def _restore(self, internal_state): self.world = internal_state[0] self.game_state = internal_state[1] def _reset(self): self.game_state['hero'] = self.reset_world() assert self.game_state['hero'] is not None, "Reset world should return hero object." self.game_state['state_id'] = 0 self.game_state['moves'] = 0 self.game_state['done'] = False return self.generate_observation(self.world) def move(self, obj, direction): if direction: dx, dy = direction.value bb = obj.bounding_box if bb[0].x + dx >= 0 and bb[1].x + dx <= self.world.grid_size.x and bb[0].y + dy >= 0 and bb[1].y + dy <= self.world.grid_size.y: others = self.world.collisions(obj, direction) if dx != 0 and dy != 0: # diagonal move, also check cardinal positions before trying to move diagonally others.extend(self.world.collisions(obj, Direction(Point(dx, 0)))) others.extend(self.world.collisions(obj, Direction(Point(0, dy)))) #we may have repeated objects for other in others: if other.name in self.BLOCKS: return False obj.pos += (dx, dy) else: return False return True def move_hero(self, direction): return self.move(self.game_state['hero'], direction) def update_environment(self, action): assert not self.game_state['done'], "The environment needs to be reset." if np.issubdtype(type(action), np.integer): if action >= len(self.ACTION_MAP): raise Exception("Action index %s not in ACTION_MAP." % action) else: action = self.ACTION_MAP[action] else: if action not in self.ACTION_MAP: raise Exception("Action %s not in ACTION_MAP. ACTION_MAP: %s" % (action, str(self.ACTION_MAP))) self.move_hero(action) self.game_state['moves'] += 1 r, self.game_state['done'], info_dict = self.update_world() return r, self.game_state['done'], info_dict def update_world(self): reward = 0. end_episode = False collisions = self.world.collisions(self.game_state['hero']) for collision in collisions: try: # TODO: use neighbors instead of collisions # state, collision_name -> new_state, reward, end_of_episode, collision_fn self.game_state['state_id'], reward, end_episode, state_change_fn = self.STATE_MAP[(self.game_state['state_id'], collision.name)] if state_change_fn: state_change_fn(self.world, collision) except KeyError: # if the pair (state, collision_name) does not exist -> new_state = state, reward = 0, end_of_episode = False, fn world, collision_obj = lambda:None pass if self.max_moves is not None and self.game_state['moves'] >= self.max_moves: end_episode = True info = { 'state_id': self.game_state['state_id'] } return reward, end_episode, info def create_world(self): """ Called at init. :return: the world (GridworldMap object) """ raise NotImplementedError() def reset_world(self): """ Called at every reset(). :return: the hero (GameObject). """ raise NotImplementedError
class MontezumaEnv(HeroEnv): MAP = [ "..................................", ".................RH...............", "..............FFFRFF..............", ".................R......R.........", ".G...............R......R.........", "....R............R......R..R......", ".FFFRFFF.....FFFFFFF....FFFRFFFF..", "....R......................R......", "....R......................R......", "....R......................R......", "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", ] HERO_MARK = 'H' COLORS = { 'F': Color.blue, 'H': Color.yellow, 'G': Color.green, 'R': Color.darkOrange } BLOCKS = {'F'} STATE_MAP = {(0, 'G'): (0, 1.0, True, None)} ACTION_MAP = Direction.all() + [None] def _reset(self): self.falling_direction = None self.going_to_die = False self.last_direction = None return super(MontezumaEnv, self)._reset() def update_world(self): collisions = self.world.all_collisions(self.game_state["hero"], return_names=True) if self.going_to_die and 'F' in collisions[ Direction.S] and not 'R' in collisions[None]: return 0, True, {} return super(MontezumaEnv, self).update_world() def move_hero(self, direction): collisions = self.world.all_collisions(self.game_state["hero"], return_names=True) #Are we falling? if not 'F' in collisions[Direction.S] and not 'R' in collisions[None]: if self.falling_direction: self.going_to_die = True else: if self.last_direction in Direction.all_west(): self.falling_direction = Direction.SW elif self.last_direction in Direction.all_east(): self.falling_direction = Direction.SE else: self.falling_direction = Direction.S res = super(MontezumaEnv, self).move_hero(self.falling_direction) if not res: #couldn't move! probably because of an horizontal collision with floor (colliding with a corner while falling). Try going down: self.falling_direction = Direction.S res = super(MontezumaEnv, self).move_hero(self.falling_direction) return res else: self.falling_direction = None self.going_to_die = False #ILEGAL ACTIONS: #To move we need to touch the floor if not 'R' in collisions[None] and not 'F' in collisions[Direction.S]: #Do not allow going up or down if not touching the floor or not on a rope direction = None #If we are in a rope, we cannot move right or left (but we can jump right or left) if direction in [Direction.E, Direction.W]: if 'R' in collisions[None] and not 'F' in collisions[ Direction.SE] + collisions[Direction.SW]: direction = None self.last_direction = direction return super(MontezumaEnv, self).move_hero(direction) def create_world(self): _, self.init_state_world = create_world_from_string_map( self.MAP, self.COLORS, self.HERO_MARK) return deepcopy(self.init_state_world) def reset_world(self): self.world = deepcopy(self.init_state_world) hero = self.world.get_objects_by_names(self.HERO_MARK)[0] return hero
def __init__(self, **kwargs): actions = Direction.all()+[None] super(MontezumaEnv, self).__init__(size=(len(self.MAP[0]), len(self.MAP)), actions=actions, block_names=['F'], **kwargs)