Exemplo n.º 1
0
class SnakeEnv(Env):
    def __init__(self):
        self.action_space = Discrete(3) # 0 = turn left, 1 = do nothing, 2 = turn right
        self.state = [0, 0, 1, 0]
        self.game = Game()
        self.reward = 0
        self.done = False

    def step(self, action):
        
        offset = (action - 1)
        translated_action = offset + self.game.snake.direction
        if translated_action < 0:
            translated_action = 3
        if translated_action > 3:
            translated_action = 0

        self.reward, self.done = self.game.run(1, translated_action)

        diff = (self.game.food.position[0] - self.game.snake.snake[0][0], self.game.food.position[1] - self.game.snake.snake[0][1])
        
        self.state[0] = int(diff[0] < 0)
        self.state[2] = int(diff[0] > 1)

        self.state[1] = int(diff[1] < 0)
        self.state[3] = int(diff[1] > 0)

        return self.state, self.reward, self.done, {}

    def render(self):
        self.game.render()

    def reset(self):
        self.game.reset()
Exemplo n.º 2
0
class SnakeWrapper:
    """
    return the croped square_size-by-square_size after rotation and changing to one-hot and doing block-notation.
    """
    # num_classes is the number of different element types that can be found on the board.
    # yes I know, actually we have 9 types, but 10 is nicer. (4 snakes + 1 obstacle + 3 fruits + 1 empty = 9)
    num_classes = 10

    # the action space. 0-left, 1-forward, 2-right.
    action_space = gym.spaces.Discrete(3)

    # the observation space. 9x9 one hot vectors, total 9x9x10.
    # your snake always look up (the observation is a rotated crop of the board).
    observation_space = gym.spaces.Box(
        low=0,
        high=num_classes,
        shape=(9, 9, 10),
        dtype=np.int
    )

    def __init__(self):
        self.game = Game()
        self.square_size = 9 # the observation size
        self.timestep = 0

    def step(self, action):
        # get action as integer, move the game one step forward
        # return tuple: state, reward, done, info. done is always False - Snake game never ends.
        action = int_to_action[action]
        reward = self.game.step(action)

        head_pos = self.game.players[1].chain[-1]
        direction = self.game.players[1].direction
        board = self.game.board
        state = preprocess_snake_state(board, head_pos, direction, self.square_size, SnakeWrapper.num_classes)

        self.timestep += 1

        return state, reward

    def seed(self, seed=None):
        return self.game.seed(seed)

    # reset the game and return the board observation
    def reset(self):
        self.game.reset()
        self.timestep = 0
        first_state, _ = self.step(0)
        return first_state

    # print the board to the console
    def render(self, mode='human'):
        self.game.render(self.timestep)
Exemplo n.º 3
0
class SnakeEnv(Environment):
    """ A (terribly simplified) Blackjack game implementation of an environment. """
    def __init__(self, indim, outdim):
        super().__init__()
        """ All tasks are coupled to an environment. """
        # the number of action values the environment accepts
        self.indim = indim

        # the number of sensor values the environment produces
        self.outdim = outdim

        self.game = None

        self.running = True
        self.numActions = 4
        self.allActions = [
            pygame.K_UP, pygame.K_DOWN, pygame.K_RIGHT, pygame.K_LEFT
        ]

        self.stochAction = 0.

        self.apple_distance = 0.
        self.apple_change = 0.

    def init_game(self, snake_size):
        self.game = Game()
        self.game.init_game(snake_size)
        self.running = True

    def getSensors(self):
        """ the currently visible state of the world (the    observation may be stochastic - repeated calls returning different values)
            :rtype: by default, this is assumed to be a numpy array of doubles
        """
        self.apple_distance = self.game.get_apple_distance()
        state = self.game.get_current_state()
        print(state)
        index = 9 * state["left"] + 3 * state["forward"] + state["right"]
        print(index)
        return [
            float(index),
        ]

    def performAction(self, action):
        """ perform an action on the world that changes it's internal state (maybe stochastically).
            :key action: an action that should be executed in the Environment.
            :type action: by default, this is assumed to be a numpy array of doubles
        """
        action = int(action[0])
        if self.stochAction > 0:
            if random() < self.stochAction:
                print(random())
                action = choice(list(range(len(self.allActions))))
        keydown = self.allActions[action]

        self.game.update_frame(keydown)
        if self.game.info["done"]:
            self.running = False
            return self.running

        self.apple_change = self.apple_distance - self.game.get_apple_distance(
        )

        self.game.render()

        if action == 0:
            print("up")
        if action == 1:
            print("down")
        if action == 2:
            print("right")
        if action == 3:
            print("left")

    def reset(self):
        """ Most environments will implement this optional method that allows for reinitialization.