def render(self, mode='human', mouse_entered=None, mouse_exit=None, mouse_pressed=None): if mode == 'state': self.print_state() return if mode == 'human': n = None ml = None f = open(self.game_file, 'a') if self.move_file is not None: n = open(self.move_file, 'a') if self.move_list is not None: ml = open(self.move_list, 'a') if self.player == 1: if self.move_count == 1: f.write("\n\n") f.write(str(self.move_count) + "." + self.move_string + " ") if n: n.write(str(self.move_count) + "." + str(self.m) + "\n") else: if self.move_count % 10 == 0: f.write(self.move_string + "\n") else: f.write(self.move_string + " ") if n: n.write("..." + str(self.m) + "\n") if ml: if self.player == 1 and self.move_count == 1: ml.write("\n\n") ml.write(str(self.n) + ",") if self.move_count % 10 == 0 and self.player == -1: ml.write("\n ") else: ml.write(" ") f.close() if n: n.close() return if mode == 'board': if self.viewer is None and not self.should_close: from gym.envs.classic_control.rendering import Viewer self.viewer = Viewer(self.a, self.b) if mouse_entered: self.unwrapped.viewer.window.on_mouse_enter = mouse_entered if mouse_exit: self.unwrapped.viewer.window.on_mouse_leave = mouse_exit if mouse_pressed: self.unwrapped.viewer.window.on_mouse_press = mouse_pressed self.unwrapped.viewer.window.on_close = self.close_window self.viewer.add_geom(self.game_board) if self.should_close: self.viewer.window.close() self.viewer = None return False return self.viewer.render(False) if mode == 'pieces': pieces = [] for i in range(64): pieces.append(self.game_board.pieces[i]) return pieces
def render(self, mode='human'): if self.viewer is None: self.viewer = Viewer(VIEWPORT_W, VIEWPORT_H) # self.viewer.set_bounds(0, self.VIEWPORT_W, 0, self.VIEWPORT_H) self.viewer.draw_polygon([ (0, 0), (VIEWPORT_W, 0), (VIEWPORT_W, VIEWPORT_H), (0, VIEWPORT_H), ], color=(0.0, 0.0, 0.0)) # Terrain for r in range(18): for c in range(18): self.tiles.draw(self.viewer, self.map[r, c], c, r, light=self.light) # Stones for s in self.stones: s.draw() # Food for f in self.foods: f.draw() # Plants order_queues = [[] for i in range(18)] for p in self.plants: order_queues[p.y].append(p) for y in range(17, 0, -1): for p in order_queues[y]: p.draw() # Creatures self.agent.draw() # Shade for r, c, num in self.dark_areas: for _ in range(num): self.tiles.draw(self.viewer, SHADE, c, r) self.tiles.draw(self.viewer, SHADE, 17 - c, r) # Highlight return self.viewer.render(return_rgb_array=mode == 'rgb_array')
def render(self, mode='board', mouse_entered=None, mouse_exit=None, mouse_pressed=None): if mode=='human': if self.played: self.played.render(mode) return True elif mode=='state': if self.played: self.played.render(mode) return True elif mode=='board': if self.should_close: return False if not self.viewer: self.viewer = Viewer(self.a + self.a//8 + 32, self.b) if mouse_entered: self.unwrapped.viewer.window.on_mouse_enter = mouse_entered else: self.unwrapped.viewer.window.on_mouse_enter = self.mouse_enter if mouse_exit: self.unwrapped.viewer.window.on_mouse_leave = mouse_exit else: self.unwrapped.viewer.window.on_mouse_leave = self.mouse_exit if mouse_pressed: self.unwrapped.viewer.window.on_mouse_press = mouse_pressed else: self.unwrapped.viewer.window.on_mouse_press = self.mouse self.unwrapped.viewer.window.on_close = self.close_window self.viewer.add_geom(self.board) try: self.viewer.render(False) except AttributeError as e: print(e) return True elif mode=='pieces': pieces = [] for i in range(64): pieces.append(self.board.pieces[i]) return pieces
class ChessGameTwoPlayers(gym.Env): metadata = {'render.modes': ['human', 'state', 'board']} def __init__(self, board_w=512, board_h=512, bot_white=True, bot_black=True): self.state = ['.'] * 64 self.board = Board(width=board_w, height=board_h) self.a = board_w self.b = board_h self.observation_space = spaces.Box(-6, 6, (64,), dtype=int) self.action_space = spaces.Discrete(n=100) if bot_white: self.player1 = ChessEnv(player=1, state=self.state, board=self.board, board_width=board_w, board_height=board_h) else: self.player1 = PlayerEnv(player=1, state=self.state, board=self.board) if bot_black: self.player2 = ChessEnv(player=-1, state=self.state, board=self.board, board_width=board_w, board_height=board_h) else: self.player2 = PlayerEnv(player=-1, state=self.state, board=self.board) self.playing = self.player1 self.viewer = None self.should_close = False self.mouse_on = True self.pos = None self.fr = None self.to = None self.played = None self.promotion = False self.promoted = None def step(self, action): if self.playing.is_bot(): state, reward, done, info = self.playing.step(action) self.played = self.playing self.switch_player() return state, reward, done, info else: move = None if self.pos is not None or self.promoted is not None: move = self.get_move(self.pos) self.board.clear_highlight() if move: self.board.clear_prom() action = self.move_to_action(move) state, reward, done, info = self.playing.step(action) self.switch_player() self.promoted = None self.promotion = False self.pos = None return state, reward, done, info else: self.highlight_moves(int(self.pos)) self.pos = None state = self.playing.state reward = 0 done = self.playing.move_handler.is_over() notation = self.playing.move_handler.to_string(move) desc = self.playing.move_handler.get_description(move=move) info = {'tuple': move, 'notation': notation, 'description': desc} return state, reward, done, info def reset(self): state = [-3, -5, -4, -2, -1, -4, -5, -3, -6, -6, -6, -6, -6, -6, -6, -6, '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', 6, 6, 6, 6, 6, 6, 6, 6, 3, 5, 4, 2, 1, 4, 5, 3] for i in range(64): self.state[i] = state[i] self.board.reset() self.playing = self.player1 self.playing.compute_moves() def render(self, mode='board', mouse_entered=None, mouse_exit=None, mouse_pressed=None): if mode=='human': if self.played: self.played.render(mode) return True elif mode=='state': if self.played: self.played.render(mode) return True elif mode=='board': if self.should_close: return False if not self.viewer: self.viewer = Viewer(self.a + self.a//8 + 32, self.b) if mouse_entered: self.unwrapped.viewer.window.on_mouse_enter = mouse_entered else: self.unwrapped.viewer.window.on_mouse_enter = self.mouse_enter if mouse_exit: self.unwrapped.viewer.window.on_mouse_leave = mouse_exit else: self.unwrapped.viewer.window.on_mouse_leave = self.mouse_exit if mouse_pressed: self.unwrapped.viewer.window.on_mouse_press = mouse_pressed else: self.unwrapped.viewer.window.on_mouse_press = self.mouse self.unwrapped.viewer.window.on_close = self.close_window self.viewer.add_geom(self.board) try: self.viewer.render(False) except AttributeError as e: print(e) return True elif mode=='pieces': pieces = [] for i in range(64): pieces.append(self.board.pieces[i]) return pieces def switch_player(self): if self.playing == self.player1: self.playing = self.player2 else: self.playing = self.player1 self.playing.compute_moves() def set_game_file(self, file, move_file=None, move_list=None): self.player1.set_game_file(file, move_file, move_list) self.player2.set_game_file(file, move_file, move_list) def print_state(self): self.playing.print_state() def close_window(self): if self.prom_view: self.prom_view.close() self.prom_view = None if self.viewer: self.viewer.close() self.viewer = None self.should_close = True def mouse(self, x, y, button, modifiers): if self.playing.is_bot(): self.pos = None return if button == pyglet.window.mouse.LEFT: self.pos = self.mouse_to_pos(x, y) self.promoted = self.get_promoted(x, y) def get_promoted(self, x, y): if x < self.a + 32: return None if y < self.b // 8: return -2 elif y < 2 * self.b // 8: return -3 elif y < 3 * self.b // 8: return -4 elif y < 4 * self.b // 8: return -5 elif y < 5 * self.b // 8: return 5 elif y < 6 * self.b // 8: return 4 elif y < 7 * self.b // 8: return 3 elif y < self.b: return 2 else: return None def mouse_enter(self, x, y): self.mouse_on = True def mouse_exit(self, x, y): self.mouse_on = False def mouse_to_pos(self, x, y): if x is None or y is None: return None if self.promotion: return None r = 7 - (y // (self.b/8)) c = x // (self.a/8) if 0 <= r <= 7 and 0 <= c <= 7: return 8 * r + c else: None def close(self): if self.viewer: self.viewer.close() self.viewer = None def highlight_moves(self, pos): if self.state[pos] not in self.playing.move_handler.teammate: return row = pos // 8 col = pos % 8 if row % 2 == col % 2: selected = Sprite('Textures/Selected_Light.png', self.a/8, self.b/8) else: selected = Sprite('Textures/Selected_Dark.png', self.a/8, self.b/8) self.board.set_highlight(pos, selected) for m in self.playing.move_handler.available_moves: if m[0] == pos: moves = Sprite('Textures/moves.png', self.a / 8, self.b / 8) self.board.set_highlight(m[1], moves) def get_move(self, pos): if pos is not None: pos = int(pos) if not self.promotion: if self.state[pos] in self.playing.move_handler.teammate: self.fr = pos if not self.fr: return None else: for move in self.playing.move_handler.available_moves: if move[0] == self.fr and move[1] == pos: self.to = pos if move[2]: self.promotion = True self.board.create_prom_menu(self.playing.p) return None else: return move else: if self.promoted: return (self.fr, self.to, self.promoted) else: return None def move_to_action(self, move): for i in range(len(self.playing.move_handler.available_moves)): if self.playing.move_handler.available_moves[i] == move: return i def close_prom(self): print("closing...") def __del__(self): self.close()
class PlayerEnv(gym.Env): metadata = {'render.modes': ['human', 'state', 'board']} def __init__(self, player, board=None, state=None): self.board = board if not board: self.board = Board(512, 512) self.p = player self.state = state if not state: self.state = ['.'] * 64 self.observation_space = spaces.Box(-6, 6, (64,), dtype=int) self.action_space = spaces.Discrete(n=64) self.player = Player(player=self.p, state=self.state) self.move_handler = self.player.move_handler self.viewer = None self.fr = None self.to = None self.game_file = None self.move_file = None self.move_list = None self.move_string = None def step(self, action): move = self.move_handler.available_moves[action] self.move_string = self.move_handler.to_string(move) state, _ = self.move_handler.move(move=move, action=action, simulate=False) self.board.update(move, True) done = self.move_handler.is_over() desc = self.move_handler.get_description(string=self.move_string) return state, 0, done, {'tuple': move, 'notation': self.move_string, 'description': desc} def reset(self): self.player.move_handler.reset() def render(self, mode='human'): def render(self, mode='human', mouse_entered=None, mouse_exit=None, mouse_pressed=None): if mode == 'state': self.print_state() return if mode == 'human': n = None ml = None f = open(self.game_file, 'a') if self.move_file is not None: n = open(self.move_file, 'a') if self.move_list is not None: ml = open(self.move_list, 'a') if self.player == 1: if self.move_count == 1: f.write("\n\n") f.write(str(self.move_count) + "." + self.move_string + " ") if n: n.write(str(self.move_count) + "." + str(self.m) + "\n") else: if self.move_count % 10 == 0: f.write(self.move_string + "\n") else: f.write(self.move_string + " ") if n: n.write("..." + str(self.m) + "\n") if ml: if self.player == 1 and self.move_count == 1: ml.write("\n\n") ml.write(str(self.n) + ",") if self.move_count % 10 == 0 and self.player == -1: ml.write("\n ") else: ml.write(" ") f.close() if n: n.close() return if mode == 'board': if self.viewer is None and not self.should_close: from gym.envs.classic_control.rendering import Viewer self.viewer = Viewer(self.a, self.b) if mouse_entered: self.unwrapped.viewer.window.on_mouse_enter = mouse_entered if mouse_exit: self.unwrapped.viewer.window.on_mouse_leave = mouse_exit if mouse_pressed: self.unwrapped.viewer.window.on_mouse_press = mouse_pressed self.unwrapped.viewer.window.on_close = self.close_window self.viewer.add_geom(self.game_board) if self.should_close: self.viewer.window.close() self.viewer = None return False return self.viewer.render(False) if mode == 'pieces': pieces = [] for i in range(64): pieces.append(self.game_board.pieces[i]) return pieces def set_game_file(self, file, move_file=None, move_list=None): self.game_file = file self.move_file = move_file self.move_list = move_list def is_bot(self): return False def compute_moves(self): self.move_handler.compute_moves() def close(self): if self.viewer: self.viewer.close() self.viewer = None def __del__(self): self.close()
class ChessEnv(gym.Env): metadata = {'render.modes': ['human', 'state', 'board', 'pieces']} piece_values = [0, 1, 9, 5, 3, 3, 1, 1, 3, 3, 5, 9, 0] piece_notations = ['', 'K', 'Q', 'R', 'B', 'N', '', '', 'N', 'B', 'R', 'Q', 'K'] # 6 = Pawn # 5 = Knight # 4 = Bishop # 3 = Rook # 2 = Queen # 1 = King # . = Empty Space # x = en passant # Positive means White # Negative means Black def __init__(self, player=1, state=None, board=None, board_width=512, board_height=512): self.player = player self.state = state self.done = False self.move_string = None self.game_file = None self.move_file = None self.move_list = None self.observation_space = spaces.Box(-6, 6, (64, ), dtype=int) self.action_space = spaces.Discrete(n=100) self.move_count = 0 self.nn = None self.m = None self.n = -1 self.move_handler = MoveHandler(p=self.player, state=self.state) self.move = None self.captured_piece = None self.en_p = None self.viewer = None self.game_board = board if board is None: self.game_board = Board(board_width, board_height, piece_arr) self.a = board_width self.b = board_height self.should_close = False self.m_x = 0 self.m_y = 0 return def step(self, action): self.compute_moves() move_count = len(self.move_handler.available_moves) if move_count == 0: return self.state, 0, True, {'tuple': None, 'notation':'', 'description': 'Finished'} action = int(str(action)) action = action % move_count # self.move = self.move_handler.available_moves[action] self.n = action self.move_handler.n = action self.move_string = self.action_notation(ind=action) self.m = self.move_handler.available_moves[action] new_state, self.en_p = self.move_handler.move(self.m, action, simulate=False) # self.state = new_state reward = self.compute_reward(action) done = self.move_handler.is_over() self.game_board.update(self.m, True) self.move_count = self.move_count + 1 self.move_handler.available_moves.clear() self.move_handler.opponent_moves.clear() return new_state, reward, done, {'tuple': self.m, 'notation': self.move_string, 'description': self.get_description()} def reset(self): state = [-3, -5, -4, -2, -1, -4, -5, -3, -6, -6, -6, -6, -6, -6, -6, -6, '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', '.', 6, 6, 6, 6, 6, 6, 6, 6, 3, 5, 4, 2, 1, 4, 5, 3] if self.state is None: self.state = [] for i in range(64): self.state.append(state[i]) else: for i in range(64): self.state[i] = state[i] self.done = False self.move_count = 0 self.move_handler.available_moves.clear() self.move_handler.opponent_moves.clear() self.move_handler.reset() self.game_board.reset() def render(self, mode='human', player=None, mouse_entered=None, mouse_exit=None, mouse_pressed=None): if mode == 'state': self.print_state() return if mode == 'human': if player is None: player = self.player if self.move_string is None: return n = None ml = None f = open(self.game_file, 'a') if self.move_file is not None: n = open(self.move_file, 'a') if self.move_list is not None: ml = open(self.move_list, 'a') if player == 1: if self.move_count == 1: f.write("\n\n") f.write(str(self.move_count) + "." + self.move_string + " ") if n: n.write(str(self.move_count) + "." + str(self.m) + "\n") else: if self.move_count % 10 == 0: f.write(self.move_string + "\n") else: f.write(self.move_string + " ") if n: n.write("..." + str(self.m) + "\n") if ml: if player == 1 and self.move_count == 1: ml.write("\n\n") ml.write(str(self.n) + ",") if self.move_count % 10 == 0 and player == -1: ml.write("\n ") else: ml.write(" ") f.close() if n: n.close() return if mode == 'board': if self.viewer is None and not self.should_close: from gym.envs.classic_control.rendering import Viewer self.viewer = Viewer(self.a, self.b) if mouse_entered: self.unwrapped.viewer.window.on_mouse_enter = mouse_entered if mouse_exit: self.unwrapped.viewer.window.on_mouse_leave = mouse_exit if mouse_pressed: self.unwrapped.viewer.window.on_mouse_press = mouse_pressed self.unwrapped.viewer.window.on_close = self.close_window self.viewer.add_geom(self.game_board) if self.should_close: self.viewer.window.close() self.viewer = None return False return self.viewer.render(False) if mode == 'pieces': pieces = [] for i in range(64): pieces.append(self.game_board.pieces[i]) return pieces def set_mouse(self, x, y): self.m_x = x self.m_y = y def get_mouse(self): return self.m_x, self.m_y def compute_reward(self, action): self.compute_moves() opp_moves = deepcopy(self.move_handler.opponent_moves) self.move_handler.eliminate_move(opponent=True) reward = 0 for move_i in self.move_handler.available_moves: try: reward = reward + ChessEnv.piece_values[self.state[move_i[0]]] except TypeError as e: print(move_i) self.render() exit(-1) for move in self.move_handler.opponent_moves: try: reward = reward - ChessEnv.piece_values[self.state[move[0]]] except TypeError: self.render() print(move) print("action = ", self.n) exit(-2) if self.move_handler.is_check(opponent=True) and len(self.move_handler.opponent_moves) == 0: reward = reward + 10000 self.move_string = self.move_string + "#" elif self.move_handler.is_check(opponent=True): reward = reward + 100 self.move_string = self.move_string + "+" self.move_handler.opponent_moves = deepcopy(opp_moves) return reward def action_notation(self, ind): indic = 'o' pos_from = self.move_handler.available_moves[ind][0] pos_to = self.move_handler.available_moves[ind][1] promoted = self.move_handler.available_moves[ind][2] if self.state[pos_to] == indic and self.state[pos_from] in [1, -1]: if pos_from - pos_to == 2: return "0-0-0" elif pos_from - pos_to == -2: return "0-0" else: return if self.state[pos_from] in ['.', 'x']: return move = ChessEnv.piece_notations[self.state[pos_from]] row_flag = False col_flag = False flag = False for m in self.move_handler.available_moves: if m[1] == pos_to and self.state[m[0]] == self.state[pos_from]: if m[0] == pos_from: continue flag = True if row_flag: pass else: if m[0] % 8 == pos_from % 8: row_flag = True if col_flag: pass else: if m[0] // 8 == pos_from // 8: col_flag = True if row_flag and col_flag: move = move + index_to_pos(pos_from) elif row_flag and self.state[pos_from] not in [6, -6]: move = move + index_to_pos(pos_from)[1] elif col_flag or flag: move = move + index_to_pos(pos_from)[0] if move == '' and self.state[pos_to] in self.move_handler.opponent: move = index_to_pos(pos_from)[0] if self.state[pos_to] in self.move_handler.opponent: captured = [None, 'King', 'Queen', 'Rook', 'Bishop', 'Knight', 'Pawn', 'Knight', 'Bishop', 'Rook', 'Queen', 'King'] self.captured_piece = captured[self.state[pos_to]] move = move + 'x' move = move + index_to_pos(pos_to) if self.state[pos_from] in [1, -1]: if pos_from - pos_to == 2: move = "0-0-0" elif pos_from - pos_to == -2: move = "0-0" if promoted not in [0, '.', 'x', '', None]: move = move + '=' + ChessEnv.piece_notations[promoted] return move def set_state(self, state=None): if state is None: self.reset() else: self.state = state self.move_handler.set_state(self.state) def set_game_file(self, file, move_file=None, move_list=None): self.game_file = file self.move_file = move_file self.move_list = move_list def print_state(self, state=None): if state is None: state = self.state for ind in range(8): for j in range(8): index = 8*ind + j print(state[index], end="\t") print() def set_nn(self, nn): self.nn = nn def compute_moves(self): self.move_handler.compute_moves() def return_possible_moves(self, player=1): # self.player = player # self.move_handler.set_player(player) self.compute_moves() return self.move_handler.available_moves def get_description(self): return self.move_handler.get_description(string=self.move_string) def get_piece_by_code(self, code): pieces = ['King', 'Queen', 'Rook', 'Bishop', 'Knight'] codes = ['K', 'Q', 'R', 'B', 'N'] for i in range(len(codes)): if code == codes[i]: return pieces[i] return 'Pawn' def is_bot(self): return True def close(self): if self.viewer: self.viewer.window.close() self.viewer = None def close_window(self): self.should_close = True def __del__(self): self.close()
def generator(width_height=(28, 28), object_scale=0.5, width_shift_range=0.25, height_shift_range=0.25, scale_range=1, rotate_range=0, count=1, objects=default_objects): """Generate images and labels in a keras data generator style. Image contains simple objects in a random pose. Args: width_height (tuple(width,height) : The size of the generated images. object_scale (float) : The size objects contained in the images as fraction of image width width_shift_range(float) : The range of random shift in object x position as fraction of image width height_shift_range(float) : The range of random shift in object y position as fraction of image height scale_range(float) : The range of scales of the objects. Objects are scaled (from x to 1) rotate_range(float) : The range of rotation in degrees objects (list) : A description of objects that can be contained in the images. Returns: generator : The keras data generator. Firt argument is the image. Second is the pose (class,x offset,y offset, scale, rotation) """ viewer = Viewer(*width_height) while 1: viewer.geoms = [] y_truth = [] for i in range(count): cls = random.randrange(len(objects)) obj = [] for g, x, y, s, r in objects[cls][2]: r *= (np.pi / 180) if g in 'B': geom = FilledPolygon([(-0.5, -0.5), (0.5, -0.5), (0.5, 0.5), (-0.5, 0.5)]) elif g in 'T': geom = FilledPolygon(([(-0.5, -0.5), (0.5, -0.5), (0.5, 0.5)])) elif g in 'C': geom = make_circle(radius=1, res=30) geom.add_attr( Transform(translation=(x, y), rotation=r, scale=s)) geom.add_attr( Transform(scale=(objects[cls][1], objects[cls][1]))) geom.set_color(.8, .6, .4) obj.append(geom) x = random.uniform(-width_shift_range, width_shift_range) y = random.uniform(-height_shift_range, height_shift_range) s = random.uniform(scale_range, 1) r = random.uniform(-rotate_range, rotate_range) * (np.pi / 180) ss = s * object_scale * width_height[0] geom = Compound(obj) geom.add_attr( Transform(translation=((x + 0.5) * width_height[0], (y + 0.5) * width_height[1]), rotation=r, scale=(ss, ss))) viewer.add_geom(geom) y_truth.append((cls, x, y, s, r)) img = viewer.render(return_rgb_array=True) yield (img, np.array(y_truth))
def render(self, mode='human'): if self.viewer is None: self.viewer = Viewer(VIEWPORT_W, VIEWPORT_H) # self.viewer.set_bounds(0, self.VIEWPORT_W, 0, self.VIEWPORT_H) self.viewer.draw_polygon([ (0, 0), (VIEWPORT_W, 0), (VIEWPORT_W, VIEWPORT_H), (0, VIEWPORT_H), ], color=(0.0, 0.0, 0.0)) self.viewer.draw_polygon([ (0, 0), (VIEWPORT_H, 0), (VIEWPORT_H, VIEWPORT_H), (0, VIEWPORT_H), ], color=(0.3, 0.3, 0.3)) # Terrain for r in range(18): for c in range(18): self.tiles.draw(self.viewer, self.map[r, c], c, r, light=self.light) # Stones for s in self.stones: s.draw() # Food for f in self.foods: f.draw() # Plants order_queues = [[] for i in range(18)] for p in self.plants: order_queues[p.y].append(p) for y in range(17, 0, -1): for p in order_queues[y]: p.draw() # Creatures self.agent.draw() # Shade for r, c, num in self.dark_areas: for _ in range(num): self.tiles.draw(self.viewer, SHADE, c, r) self.tiles.draw(self.viewer, SHADE, 17 - c, r) # draw time scale # TODO: Timescales #--------------------------------- # Draw what is in hand #--------------------------------- hand_x, hand_y = 19, 10 self.tiles.draw(self.viewer, HAND, hand_x, hand_y) in_hand = self.agent.what_is_in_hand() if in_hand == 1: self.tiles.draw(self.viewer, FOOD_IN_HAND, hand_x, hand_y) if in_hand == 2: self.tiles.draw(self.viewer, WATER_IN_HAND, hand_x, hand_y) if in_hand == 3: self.tiles.draw(self.viewer, STONE_IN_HAND, hand_x, hand_y) #--------------------------------- # draw player stats: #--------------------------------- white = (1.0, 1.0, 1.0) red = (0.6, 0.1, 0.1) green = (0.1, 0.6, 0.1) blue = (0.1, 0.1, 0.8) yellow = (0.7, 0.7, 0.2) black = (0, 0, 0) bright_red = (1, 0, 0) bar_coords = [617, 662, 707, 752] for x in bar_coords: x2 = x + 30 y1 = 18 y2 = 18 + 302 self.viewer.draw_polyline([(x, y2), (x, y1), (x2, y1), (x2, y2)], color=white, linewidth=1) agent = self.agent # Health Level x1 = bar_coords[0] x2 = x1 + 29 y1 = 18 y2 = y1 + 3 * agent.health + 1 self.viewer.draw_polygon([(x1, y1), (x2, y1), (x2, y2), (x1, y2)], color=red) # Energy Level x1 = bar_coords[1] x2 = x1 + 29 y1 = 18 y2 = y1 + 3 * agent.energy + 1 self.viewer.draw_polygon([(x1, y1), (x2, y1), (x2, y2), (x1, y2)], color=yellow) # FOOD Level x1 = bar_coords[2] x2 = x1 + 29 y1 = 18 y2 = y1 + 3 * agent.food + 1 self.viewer.draw_polygon([(x1, y1), (x2, y1), (x2, y2), (x1, y2)], color=green) # WATER Level x1 = bar_coords[3] x2 = x1 + 29 y1 = 18 y2 = y1 + 3 * agent.water + 1 self.viewer.draw_polygon([(x1, y1), (x2, y1), (x2, y2), (x1, y2)], color=blue) # Top Bar for x in bar_coords: self.viewer.draw_polyline([(x, 320), (x + 30, 320)], color=white, linewidth=1) danger_levels = [20, 20, 19 + 3 * 25, 19 + 3 * 25] for x, y in zip(bar_coords, danger_levels): self.viewer.draw_polyline([(x, y + 2), (x + 8, y + 2), (x + 8, y - 1), (x, y - 1)], color=black, linewidth=1) self.viewer.draw_polygon([(x, y + 1), (x + 7, y + 1), (x + 7, y - 1), (x, y - 1)], color=bright_red) self.viewer.draw_polyline([(x + 29, y + 2), (x + 22, y + 2), (x + 22, y - 1), (x + 29, y - 1)], color=black, linewidth=1) self.viewer.draw_polygon([(x + 29, y + 1), (x + 22, y + 1), (x + 22, y - 1), (x + 29, y - 1)], color=bright_red) return self.viewer.render(return_rgb_array=mode == 'rgb_array')
class SimpleEnv(gym.Env): """ Action-Space - provided as a single integer 0 = REST 1 = MOVE NORTH 2 = MOVE EAST 3 = MOVE SOUTH 4 = MOVE WEST 5 = PICK UP OBJECT on same tile as agent 6 = PUT DOWN OBJECT on same tile as agent 7 = CONSUME ITEM IN HAND 8 = THROW OBJECT NORTH 9 = THROW OBJECT EAST 10 = THROW OBJECT SOUTH 11 = THROW OBJECT WEST State-Space - provided as a dictionary health - number from 0 to 100 showing the level of health. A health of zero (or less) results in death energy - number from 0 to 100 showing level of energy available. All actions consume energy. Eating food and drinking water replenish energy. Resting replenishes energy. food - number from 0 to 100 showing the level of food in the agent. At food < 25, energy is decreased more quickly than normal. At food <= 0, health decreases over time. water - number from 0 to 100 showing level of water in the agent. At water < 25, energy decreases more quickly than normal. At water <= 0, health decreases rapidly (such that death in 2 days' worth of turns) in_hand - an indicator of what is held by the agent at that time: The possible values are: 0 = Nothing, hand is empty 1 = Food in hand 2 = Water in hand 3 = A Stone is in hand sight - a <2N+1 x 2N+1> matrix of what the agent can see (other than itself). The center square will represent where the player is. There will be an integer value in each cell of the matrix. The site matrix is represented like a math matrix with <rows x colunns> so the point X, Y would be sight_matrix[Y, X]. The range of values in the sight matrix will be: 0 = GRASS 1 = BEACH (near danger, yields water) 2 = CLIFF-EDGE (near danger) 3 = FOREST-EDGE (near danger) 4 = ROCK-WALL (impassable) 5 = DROPOFF beyond the CLIFF EDGE (death) 6 = WATER (death) 7 = DARK-FOREST (death) 8 = PLANT-STAGE-1 9 = PLANT-STAGE-2 10 = PLANT-STAGE-3 11 = PLANT-READY-TO-HARVEST (yields food) 12 = FOOD (on the ground which can be picked up -- yields food) 13 = STONE (can be picked up, can be thrown) 14 = PREDATOR (seeks agent, kills agent) """ #----------------------------------------------------------------------------------------------- def __init__(self, seed=2021): self.seed = seed self.viewer = None self.map = np.zeros((18, 18), dtype=int) self.objects = np.zeros((18, 18), dtype=int) self.tiles = Tileset() self.light = 1.0 self.food_id = 0 self.season = 0 self.day = 0 self.time = 0 self.reset() #----------------------------------------------------------------------------------------------- def get_sight_matrix(self, agent, size=2): tile_map = [ 0, 6, 5, 1, 1, 1, 1, 1, 2, 2, 4, 3, 7, -1, 14, 13, 8, 9, 10, 11, -1, 1, 2, 2, 12, -99, -99 ] smat = np.zeros((2 * size + 1, 2 * size + 1), dtype=int) for r in range(2 * size + 1): for c in range(2 * size + 1): x = agent.x - size + c y = agent.y + size - r v = -999 if 0 <= x <= 17 and 0 <= y <= 17: tile = self.map[y, x] v = tile_map[tile] else: if x >= 0 and y >= 0: if x >= y: v = 5 # DROPOFF else: v = 7 # DARK FOREST elif x < 0 and y >= 0: if 17 - x >= y: v = 5 else: v = 7 elif x >= 0 and y < 0: if 17 - x >= y: v = 4 else: v = 5 elif x < 0 and y < 0: if x >= y: v = 4 else: v = 5 smat[r, c] = v for p in self.plants: r = agent.y - p.y + size c = p.x - agent.x + size if 0 <= r <= 2 * size and 0 <= c <= 2 * size: smat[r, c] = 8 + p.stage for f in self.foods: r = agent.y - f.y + size c = f.x - agent.x + size if 0 <= r <= 2 * size and 0 <= c <= 2 * size: smat[r, c] = 12 for s in self.stones: r = agent.y - s.y + size c = s.x - agent.x + size if 0 <= r <= 2 * size and 0 <= c <= 2 * size: smat[r, c] = 13 # smat[size, size] = 8 return smat def _get_state(self): state = { 'health': self.agent.health, 'energy': self.agent.energy, 'food': self.agent.food, 'water': self.agent.water, 'in_hand': self.agent.what_is_in_hand(), 'sight': self.get_sight_matrix(self.agent) } return state #----------------------------------------------------------------------------------------------- def step(self, action): """ Takes one step of action in the environment and returns the resulting state and reward information :param action: :return: """ self.agent.step() # lame way to call the agent actions... if action == 0: self.agent.rest() elif action == 1: self.agent.move_north() elif action == 2: self.agent.move_east() elif action == 3: self.agent.move_south() elif action == 4: self.agent.move_west() elif action == 5: self.agent.pick_up() elif action == 6: self.agent.put_down() elif action == 7: self.agent.consume_item() elif action == 8: self.agent.throw_north() elif action == 9: self.agent.throw_east() elif action == 10: self.agent.throw_south() elif action == 11: self.agent.throw_west() for p in self.plants: p.step() for s in self.stones: s.step() for f in self.foods: f.step() state = self._get_state() # this can eventually be a log of information about 'why' different things happened in response to actions debug = {} # Results reward = 1 is_done = False if self.agent.health <= 0: reward = -1000 is_done = True return state, reward, is_done, debug #----------------------------------------------------------------------------------------------- def render(self, mode='human'): if self.viewer is None: self.viewer = Viewer(VIEWPORT_W, VIEWPORT_H) # self.viewer.set_bounds(0, self.VIEWPORT_W, 0, self.VIEWPORT_H) self.viewer.draw_polygon([ (0, 0), (VIEWPORT_W, 0), (VIEWPORT_W, VIEWPORT_H), (0, VIEWPORT_H), ], color=(0.0, 0.0, 0.0)) self.viewer.draw_polygon([ (0, 0), (VIEWPORT_H, 0), (VIEWPORT_H, VIEWPORT_H), (0, VIEWPORT_H), ], color=(0.3, 0.3, 0.3)) # Terrain for r in range(18): for c in range(18): self.tiles.draw(self.viewer, self.map[r, c], c, r, light=self.light) # Stones for s in self.stones: s.draw() # Food for f in self.foods: f.draw() # Plants order_queues = [[] for i in range(18)] for p in self.plants: order_queues[p.y].append(p) for y in range(17, 0, -1): for p in order_queues[y]: p.draw() # Creatures self.agent.draw() # Shade for r, c, num in self.dark_areas: for _ in range(num): self.tiles.draw(self.viewer, SHADE, c, r) self.tiles.draw(self.viewer, SHADE, 17 - c, r) # draw time scale # TODO: Timescales #--------------------------------- # Draw what is in hand #--------------------------------- hand_x, hand_y = 19, 10 self.tiles.draw(self.viewer, HAND, hand_x, hand_y) in_hand = self.agent.what_is_in_hand() if in_hand == 1: self.tiles.draw(self.viewer, FOOD_IN_HAND, hand_x, hand_y) if in_hand == 2: self.tiles.draw(self.viewer, WATER_IN_HAND, hand_x, hand_y) if in_hand == 3: self.tiles.draw(self.viewer, STONE_IN_HAND, hand_x, hand_y) #--------------------------------- # draw player stats: #--------------------------------- white = (1.0, 1.0, 1.0) red = (0.6, 0.1, 0.1) green = (0.1, 0.6, 0.1) blue = (0.1, 0.1, 0.8) yellow = (0.7, 0.7, 0.2) black = (0, 0, 0) bright_red = (1, 0, 0) bar_coords = [617, 662, 707, 752] for x in bar_coords: x2 = x + 30 y1 = 18 y2 = 18 + 302 self.viewer.draw_polyline([(x, y2), (x, y1), (x2, y1), (x2, y2)], color=white, linewidth=1) agent = self.agent # Health Level x1 = bar_coords[0] x2 = x1 + 29 y1 = 18 y2 = y1 + 3 * agent.health + 1 self.viewer.draw_polygon([(x1, y1), (x2, y1), (x2, y2), (x1, y2)], color=red) # Energy Level x1 = bar_coords[1] x2 = x1 + 29 y1 = 18 y2 = y1 + 3 * agent.energy + 1 self.viewer.draw_polygon([(x1, y1), (x2, y1), (x2, y2), (x1, y2)], color=yellow) # FOOD Level x1 = bar_coords[2] x2 = x1 + 29 y1 = 18 y2 = y1 + 3 * agent.food + 1 self.viewer.draw_polygon([(x1, y1), (x2, y1), (x2, y2), (x1, y2)], color=green) # WATER Level x1 = bar_coords[3] x2 = x1 + 29 y1 = 18 y2 = y1 + 3 * agent.water + 1 self.viewer.draw_polygon([(x1, y1), (x2, y1), (x2, y2), (x1, y2)], color=blue) # Top Bar for x in bar_coords: self.viewer.draw_polyline([(x, 320), (x + 30, 320)], color=white, linewidth=1) danger_levels = [20, 20, 19 + 3 * 25, 19 + 3 * 25] for x, y in zip(bar_coords, danger_levels): self.viewer.draw_polyline([(x, y + 2), (x + 8, y + 2), (x + 8, y - 1), (x, y - 1)], color=black, linewidth=1) self.viewer.draw_polygon([(x, y + 1), (x + 7, y + 1), (x + 7, y - 1), (x, y - 1)], color=bright_red) self.viewer.draw_polyline([(x + 29, y + 2), (x + 22, y + 2), (x + 22, y - 1), (x + 29, y - 1)], color=black, linewidth=1) self.viewer.draw_polygon([(x + 29, y + 1), (x + 22, y + 1), (x + 22, y - 1), (x + 29, y - 1)], color=bright_red) return self.viewer.render(return_rgb_array=mode == 'rgb_array') #----------------------------------------------------------------------------------------------- def reset(self): """ This actually does the initialization """ rnd.seed(42) self.season = 0 self.day = 0 self.time = 0 # BACKGROUND TILES for r in range(18): for c in range(18): self.map[r, c] = GRASS for r in range(4, 16): self.map[r, 0] = CLIFF_W self.map[r, 17] = CLIFF_E for c in range(0, 18): self.map[0, c] = ROCK self.map[17, c] = FOREST self.map[16, c] = FORESTEDGE for i in range(0, 3): self.map[i + 1, 0] = ROCK self.map[i + 1, 4] = ROCK self.map[3, i] = ROCK self.map[i + 1, 17] = ROCK self.map[i + 1, 13] = ROCK self.map[3, 17 - i] = ROCK for i in range(5, 13): self.map[1, i] = WATER self.map[2, i] = WATER self.map[3, i] = BEACH_N self.map[15, 0] = CLIFF_SW self.map[15, 17] = CLIFF_SE self.dark_areas = [(1, 1, 4), (1, 2, 3), (1, 3, 2), (2, 1, 3), (2, 2, 2), (2, 3, 1), (3, 3, 1)] # CREATURES self.agent = Agent(self, 1, 9, 9) # PLANTS self.plants = [] for idx in range(12): while True: x, y = rnd.randint(1, 16), rnd.randint(4, 15) if self.objects[y, x] == 0: self.objects[y, x] = PLANT break stage = rnd.randint(0, 3) plant = Plant(self, idx, x, y, stage) self.plants.append(plant) # Stones - DISABLED in SIMPLE ENV self.stones = [] # for idx in range(15): # while True: # x, y = rnd.randint(1, 16), rnd.randint(4, 15) # if self.objects[y, x] == 0: # self.objects[y, x] = STONE # break # # stone = Stone(self, idx, rnd.randint(1, 16), rnd.randint(4, 15)) # self.stones.append(stone) # Empty at first, but can be filled as things are set down self.foods = [] return self._get_state() #----------------------------------------------------------------------------------------------- def _cleanup(self): """
class DiscreteEnv(gym.Env): def __init__(self, seed=2021): self.seed = seed self.viewer = None self.map = np.zeros((18, 18), dtype=int) self.tiles = Tileset() self.light = 1.0 self.time = 0 self.day = 0 self.season = 0 def reset(self): self._cleanup() self._init() def step(self, action): """ Takes one step of action in the environment and returns the resulting state and reward information :param action: :return: """ # time update # self.time += 0.5 if self.time < 40: self.light = 1.0 elif self.time < 50: self.light = 1.0 - (self.time - 40) * 0.05 elif self.time < 70: self.light = 0.5 elif self.time < 80: self.light = 0.5 + (self.time - 70) * 0.05 else: self.light = 1.0 self.time = 0 self.day += 1 if self.day >= 20: self.day = 0 self.season += 1 if self.season > 3: self.season = 0 # Results state = [0, 0, 0, 0, 0, 0, 0, 0, 0] reward = 0 is_done = False debug = {} return np.array(state), reward, is_done, debug def render(self, mode='human'): if self.viewer is None: self.viewer = Viewer(VIEWPORT_W, VIEWPORT_H) # self.viewer.set_bounds(0, self.VIEWPORT_W, 0, self.VIEWPORT_H) self.viewer.draw_polygon([ (0, 0), (VIEWPORT_W, 0), (VIEWPORT_W, VIEWPORT_H), (0, VIEWPORT_H), ], color=(0.0, 0.0, 0.0)) # Terrain for r in range(18): for c in range(18): self.tiles.draw(self.viewer, self.map[r, c], c, r, light=self.light) # Stones for s in self.stones: s.draw() # Food for f in self.foods: f.draw() # Plants order_queues = [[] for i in range(18)] for p in self.plants: order_queues[p.y].append(p) for y in range(17, 0, -1): for p in order_queues[y]: p.draw() # Creatures self.agent.draw() # Shade for r, c, num in self.dark_areas: for _ in range(num): self.tiles.draw(self.viewer, SHADE, c, r) self.tiles.draw(self.viewer, SHADE, 17 - c, r) # Highlight return self.viewer.render(return_rgb_array=mode == 'rgb_array') def _init(self): """ This actually does the initialization """ self.season = 0 self.day = 0 self.time = 0 # BACKGROUND TILES for r in range(18): for c in range(18): self.map[r, c] = GRASS for r in range(4, 16): self.map[r, 0] = CLIFF_W self.map[r, 17] = CLIFF_E for c in range(0, 18): self.map[0, c] = ROCK self.map[17, c] = FOREST self.map[16, c] = FORESTEDGE for i in range(0, 3): self.map[i + 1, 0] = ROCK self.map[i + 1, 4] = ROCK self.map[3, i] = ROCK self.map[i + 1, 17] = ROCK self.map[i + 1, 13] = ROCK self.map[3, 17 - i] = ROCK for i in range(5, 13): self.map[1, i] = WATER self.map[2, i] = WATER self.map[3, i] = BEACH_N self.map[15, 0] = CLIFF_SW self.map[15, 17] = CLIFF_SE self.dark_areas = [(1, 1, 4), (1, 2, 3), (1, 3, 2), (2, 1, 3), (2, 2, 2), (2, 3, 1), (3, 3, 1)] # CREATURES self.agent = Agent(self, 1, 9, 9) # PLANTS self.plants = [] for idx in range(12): plant = Plant(self, idx, rnd.randint(1, 16), rnd.randint(4, 15), rnd.randint(0, 3)) self.plants.append(plant) # Stones self.stones = [] for idx in range(15): plant = Stone(self, idx, rnd.randint(1, 16), rnd.randint(4, 15)) self.plants.append(plant) self.foods = [] def _cleanup(self): """