Пример #1
0
class SnakeEnv(gym.Env):
    """Open AI Snake Environment"""
    metadata = {'render.modes': ['human']}

    def __init__(self, size, max_without_eating=300, mode='standard'):
        super(SnakeEnv, self).__init__()
        self.max_without_eating = max_without_eating
        self.steps_without_apple = 0

        if mode == 'standard':
            self.action_space = spaces.Discrete(4)
        elif mode == 'pov':
            self.action_space = spaces.Discrete(3)
        else:
            raise ValueError('Uknown mode: ' + str(mode))

        self.observation_space = spaces.Box(low=0,
                                            high=1,
                                            shape=(size[0], size[1], 3),
                                            dtype=np.uint8)
        self.game = SnakeGame(size, controls=mode)

    def step(self, action):
        self.game.update(action)

        if self.game.ate_apple:
            reward = 1
            self.steps_without_apple = 0
        elif self.game.snake.dead:
            reward = -1
        else:
            reward = 0
            self.steps_without_apple += 1

        done = self.steps_without_apple > self.max_without_eating or self.game.snake.dead

        return self.game.get_state(), reward, done, {}

    def reset(self):
        self.game.reset()
        self.explored = np.zeros_like(self.explored)
        self.steps_without_apple = 0
        return self.game.get_state()

    def render(self, mode='human', close=False):
        if mode == 'human':
            time.sleep(0.1)
            self.game.render(mode=mode)

    def seed(self, seed=None):
        np.random.seed(seed)
Пример #2
0
from snake_game import SnakeGame
from agent import Agent
from Plot import plot

if __name__ == '__main__':
    plot_scores = []
    plot_mean_scores = []
    total_score = 0
    record = 0
    agent = Agent()
    game = SnakeGame()

    #game_loop
    while True:
        # get state
        state_old = game.get_state()

        # get move
        action = agent.get_action(state_old)

        # perform move and get new state
        reward, done, score = game.play_step(action)
        state_new = game.get_state()

        # train short memory
        agent.train_short_memory(state_old, action, reward, state_new, done)

        # remember
        agent.remember(state_old, action, reward, state_new, done)

        if done:  #게임이 끝났을때