class SnakeEnv(Env): def __init__(self): self.action_space = Discrete(3) # 0 = turn left, 1 = do nothing, 2 = turn right self.state = [0, 0, 1, 0] self.game = Game() self.reward = 0 self.done = False def step(self, action): offset = (action - 1) translated_action = offset + self.game.snake.direction if translated_action < 0: translated_action = 3 if translated_action > 3: translated_action = 0 self.reward, self.done = self.game.run(1, translated_action) diff = (self.game.food.position[0] - self.game.snake.snake[0][0], self.game.food.position[1] - self.game.snake.snake[0][1]) self.state[0] = int(diff[0] < 0) self.state[2] = int(diff[0] > 1) self.state[1] = int(diff[1] < 0) self.state[3] = int(diff[1] > 0) return self.state, self.reward, self.done, {} def render(self): self.game.render() def reset(self): self.game.reset()
class SnakeWrapper: """ return the croped square_size-by-square_size after rotation and changing to one-hot and doing block-notation. """ # num_classes is the number of different element types that can be found on the board. # yes I know, actually we have 9 types, but 10 is nicer. (4 snakes + 1 obstacle + 3 fruits + 1 empty = 9) num_classes = 10 # the action space. 0-left, 1-forward, 2-right. action_space = gym.spaces.Discrete(3) # the observation space. 9x9 one hot vectors, total 9x9x10. # your snake always look up (the observation is a rotated crop of the board). observation_space = gym.spaces.Box( low=0, high=num_classes, shape=(9, 9, 10), dtype=np.int ) def __init__(self): self.game = Game() self.square_size = 9 # the observation size self.timestep = 0 def step(self, action): # get action as integer, move the game one step forward # return tuple: state, reward, done, info. done is always False - Snake game never ends. action = int_to_action[action] reward = self.game.step(action) head_pos = self.game.players[1].chain[-1] direction = self.game.players[1].direction board = self.game.board state = preprocess_snake_state(board, head_pos, direction, self.square_size, SnakeWrapper.num_classes) self.timestep += 1 return state, reward def seed(self, seed=None): return self.game.seed(seed) # reset the game and return the board observation def reset(self): self.game.reset() self.timestep = 0 first_state, _ = self.step(0) return first_state # print the board to the console def render(self, mode='human'): self.game.render(self.timestep)
class SnakeEnv(Environment): """ A (terribly simplified) Blackjack game implementation of an environment. """ def __init__(self, indim, outdim): super().__init__() """ All tasks are coupled to an environment. """ # the number of action values the environment accepts self.indim = indim # the number of sensor values the environment produces self.outdim = outdim self.game = None self.running = True self.numActions = 4 self.allActions = [ pygame.K_UP, pygame.K_DOWN, pygame.K_RIGHT, pygame.K_LEFT ] self.stochAction = 0. self.apple_distance = 0. self.apple_change = 0. def init_game(self, snake_size): self.game = Game() self.game.init_game(snake_size) self.running = True def getSensors(self): """ the currently visible state of the world (the observation may be stochastic - repeated calls returning different values) :rtype: by default, this is assumed to be a numpy array of doubles """ self.apple_distance = self.game.get_apple_distance() state = self.game.get_current_state() print(state) index = 9 * state["left"] + 3 * state["forward"] + state["right"] print(index) return [ float(index), ] def performAction(self, action): """ perform an action on the world that changes it's internal state (maybe stochastically). :key action: an action that should be executed in the Environment. :type action: by default, this is assumed to be a numpy array of doubles """ action = int(action[0]) if self.stochAction > 0: if random() < self.stochAction: print(random()) action = choice(list(range(len(self.allActions)))) keydown = self.allActions[action] self.game.update_frame(keydown) if self.game.info["done"]: self.running = False return self.running self.apple_change = self.apple_distance - self.game.get_apple_distance( ) self.game.render() if action == 0: print("up") if action == 1: print("down") if action == 2: print("right") if action == 3: print("left") def reset(self): """ Most environments will implement this optional method that allows for reinitialization.