Пример #1
0
class SnakeEnv(Env):
    def __init__(self):
        self.action_space = Discrete(3) # 0 = turn left, 1 = do nothing, 2 = turn right
        self.state = [0, 0, 1, 0]
        self.game = Game()
        self.reward = 0
        self.done = False

    def step(self, action):
        
        offset = (action - 1)
        translated_action = offset + self.game.snake.direction
        if translated_action < 0:
            translated_action = 3
        if translated_action > 3:
            translated_action = 0

        self.reward, self.done = self.game.run(1, translated_action)

        diff = (self.game.food.position[0] - self.game.snake.snake[0][0], self.game.food.position[1] - self.game.snake.snake[0][1])
        
        self.state[0] = int(diff[0] < 0)
        self.state[2] = int(diff[0] > 1)

        self.state[1] = int(diff[1] < 0)
        self.state[3] = int(diff[1] > 0)

        return self.state, self.reward, self.done, {}

    def render(self):
        self.game.render()

    def reset(self):
        self.game.reset()
Пример #2
0
def eval_genome(genome, config):
    net = neat.nn.FeedForwardNetwork.create(genome, config)
    fitnesses = []

    for runs in range(runs_per_net):
        game = Game(20, 20)

        # Run the given simulation for up to num_steps time steps.
        fitness = 0.0
        while True:
            inputs = game.get_normalized_state()
            action = net.activate(inputs)

            # Apply action to the simulated snake
            valid = game.step(np.argmax(action))

            # Stop if the network fails to keep the snake within the boundaries or hits itself.
            # The per-run fitness is the number of pills eaten
            if not valid:
                break

            fitness = game.fitness

        fitnesses.append(fitness)

    # The genome's fitness is its worst performance across all runs.
    return min(fitnesses)
 def getTrainingData(self):
     print('Getting Training Data . . .')
     data = []
     number = int(self.train_games / 20)
     for x in range(self.train_games):
         game = Game(x=self.x, y=self.y)
         c_data = []
         self.game = game
         snake = game.start()
         current_state = self.getState(snake)
         for _ in range(self.max_steps):
             action = self.getAction()
             length = snake.length
             done, snake, closer = game.step(action)
             if done: break
             elif not closer: continue
             else:
                 correct_output = [0, 0, 0]
                 correct_output[action + 1] = 1
                 num = 1
                 if snake.length > length: num = 3
                 for _ in range(num):
                     c_data.append([current_state, correct_output])
                 current_state = self.getState(snake)
         if snake.length > 2:
             for el in c_data:
                 data.append(el)
         if x % number == 0: print(f'{int(x/self.train_games*100)}%')
     return data
Пример #4
0
def eval_genome(genome, config):
    net = neat.nn.FeedForwardNetwork.create(genome, config)

    fitnesses = []

    for runs in range(runs_per_net):

        #pygame.init()
        #screen = pygame.display.set_mode((20 * 16,20 * 16))
        #screen.fill(pygame.Color('black'))
        #pygame.display.set_caption('Snake')
        #pygame.display.flip()

        sim = Game(20, 20)

        # Run the given simulation for up to num_steps time steps.
        fitness = 0.0
        while True:
            inputs = sim.get_normalized_state()
            action = net.activate(inputs)

            # Apply action to the simulated snake
            valid = sim.step(np.argmax(action))

            # Stop if the network fails to keep the snake within the boundaries or hits itself.
            # The per-run fitness is the number of pills eaten
            if not valid:
                break

            fitness = sim.score

        fitnesses.append(fitness)

    # The genome's fitness is its worst performance across all runs.
    return min(fitnesses)
Пример #5
0
class Play():
    def __init__(self):
        self.window = pygame.display.set_mode((270, 270))

        self.game = Game(270, 270, 9, self.window, 0, 0)
        self.model = self.load_model()

    def load_model(self):
        try:
            f = torch.load("best.pth")
        except:
            f = None
        return f

    def run(self):
        clock = pygame.time.Clock()
        while True:
            pygame.time.delay(50)
            clock.tick(10)

            self.window.fill((0, 0, 0))

            self.game.game_loop(train=False, model=self.model)

            pygame.display.update()
Пример #6
0
def test_egg():
    
    g = Game((5, 5), (10, 10))
    # create a snake that takes almost all the space
    g.snake = [ (i, j) for i in range(5) for j in range(5) ]
    g.snake.remove( (2, 2))
    
    assert g.random_egg() == (2, 2)
Пример #7
0
class Play():
    def __init__(self):
        self.window = pygame.display.set_mode((270, 270))

        self.game = Game(270, 270, 9, self.window, 0, 0)
        self.model = self.load_model()

    def load_model(self):
        try:
            f = torch.load("best.pth")
        except:
            f = None
        return f

    def get_average(self, arr):
        s = 0.0
        for a in arr:
            s += a

        return s / len(arr)

    def run(self):
        clock = pygame.time.Clock()

        cicles = 0
        inf_loops = 0
        prev = 0

        while True:
            pygame.time.delay(1)
            clock.tick(1000000)

            self.window.fill((0, 0, 0))

            self.game.game_loop(train=False, model=self.model)

            if self.game.reward == 0:
                cicles += 1
                inf_loops = 0

            if inf_loops == 300 and prev == self.game.points:
                self.game.restart()
                cicles += 1
                inf_loops = 0

            if prev < self.game.points:
                prev = self.game.points

            inf_loops += 1

            if cicles == 1000:
                print(
                    f'Max: {max(self.game.points_ls)}, Average: {self.get_average(self.game.points_ls)}'
                )
                exit()

            pygame.display.update()
Пример #8
0
    def __init__(self):
        self.window = pygame.display.set_mode((270, 270))

        self.game = Game(270, 270, 9, self.window, 0, 0)

        self.model = self.load_model()

        self.cnt = 0

        if self.model != None:
            self.game.agent.update_model(self.model)
            self.game.agent.update_tgt(self.model)
Пример #9
0
 def build_agent():
     game = Game(
         map_size=(20, 20),
         initial_snake_length=3,
         create_observation_strategy=InverseDistanceObservationStrategy,
         create_reward_strategy=SquareExpRewardStrategy)
     return snake.agent.Agent(env=game, hidden_nodes=[18, 18])
Пример #10
0
def test_move():

    g = Game((5, 5), (10, 10))
    # create a small snake
    g.snake = [ (1, 1), (2, 1), (3, 1) ]
    g.egg = (0, 0)

    g.move_snake(1, 0, False)
    assert g.snake == [ (2, 1), (3, 1), (4, 1) ]

    g.move_snake(1, 0, False)
    assert g.snake == [ (3, 1), (4, 1), (0, 1) ]

    # no u-turn
    g.move_snake(-1, 0, False)
    assert g.snake == [ (3, 1), (4, 1), (0, 1) ]
 def build_agent():
     game = Game(
         map_size=(20, 20),
         create_observation_strategy=InverseDistanceObservationStrategy,
         create_reward_strategy=SurvivalRewardStrategy)
     game = MaxTimestepsWrapper(game, max_timesteps=1000)
     return snake.agent.Agent(env=game, hidden_nodes=[18, 18])
Пример #12
0
class Play():
    def __init__(self):
        self.window = pygame.display.set_mode((270, 270))

        self.game = Game(270, 270, 9, self.window, 0, 0)

        self.model = self.load_model()

        self.cnt = 0

        if self.model != None:
            self.game.agent.update_model(self.model)
            self.game.agent.update_tgt(self.model)

    def save_model(self):
        print("Saving model")
        torch.save(self.game.agent.tgt, "best.pth")

    def load_model(self):
        try:
            f = torch.load("best.pth")
        except:
            f = None
        return f

    def run(self):
        clock = pygame.time.Clock()
        while True:
            pygame.time.delay(1)
            clock.tick(1000000)

            self.window.fill((0, 0, 0))

            self.game.game_loop(train=True)

            if self.game.agent.tgt_updated:
                self.cnt += 1
                print(self.cnt, " Target model updated:",
                      self.game.get_average_reward())
                self.game.agent.tgt_updated = False
                self.save_model()

            if self.cnt == 500:
                exit()

            pygame.display.update()
 def showGame(self, model):
     game = Game(x=self.x, y=self.y, gui=True)
     self.game = game
     while True:
         snake = game.start()
         steps = self.max_steps
         current_state = self.getState(snake)
         while True:
             m = model.predict(np.array([current_state]))
             action = list(m[0]).index(max(list(m[0]))) - 1
             length = snake.length
             done, snake, c = game.step(action)
             if done: break
             elif snake.length > length: steps = self.max_steps
             else: current_state = self.getState(snake)
             time.sleep(.05)
             steps -= 1
             if steps == 0:
                 break
 def test(self, model):
     print('Testing . . .')
     num = int(self.test_games / 20)
     lengths = []
     game = Game(x=self.x, y=self.y)
     self.game = game
     for x in range(self.test_games):
         snake = game.start()
         steps = self.max_steps
         current_state = self.getState(snake)
         while True:
             m = model.predict(np.array([current_state]))
             action = list(m[0]).index(max(list(m[0]))) - 1
             length = snake.length
             done, snake, _ = game.step(action)
             if done: break
             elif snake.length > length: steps = self.max_steps
             else: current_state = self.getState(snake)
             steps -= 1
             if steps == 0:
                 break
         lengths.append(snake.length)
         if x % num == 0: print(f'{int((x/self.test_games)*100)}%')
     print(f'Average: {sum(lengths)/len(lengths)}')
Пример #15
0
class SnakeWrapper:
    """
    return the croped square_size-by-square_size after rotation and changing to one-hot and doing block-notation.
    """
    # num_classes is the number of different element types that can be found on the board.
    # yes I know, actually we have 9 types, but 10 is nicer. (4 snakes + 1 obstacle + 3 fruits + 1 empty = 9)
    num_classes = 10

    # the action space. 0-left, 1-forward, 2-right.
    action_space = gym.spaces.Discrete(3)

    # the observation space. 9x9 one hot vectors, total 9x9x10.
    # your snake always look up (the observation is a rotated crop of the board).
    observation_space = gym.spaces.Box(
        low=0,
        high=num_classes,
        shape=(9, 9, 10),
        dtype=np.int
    )

    def __init__(self):
        self.game = Game()
        self.square_size = 9 # the observation size
        self.timestep = 0

    def step(self, action):
        # get action as integer, move the game one step forward
        # return tuple: state, reward, done, info. done is always False - Snake game never ends.
        action = int_to_action[action]
        reward = self.game.step(action)

        head_pos = self.game.players[1].chain[-1]
        direction = self.game.players[1].direction
        board = self.game.board
        state = preprocess_snake_state(board, head_pos, direction, self.square_size, SnakeWrapper.num_classes)

        self.timestep += 1

        return state, reward

    def seed(self, seed=None):
        return self.game.seed(seed)

    # reset the game and return the board observation
    def reset(self):
        self.game.reset()
        self.timestep = 0
        first_state, _ = self.step(0)
        return first_state

    # print the board to the console
    def render(self, mode='human'):
        self.game.render(self.timestep)
Пример #16
0
    def __init__(self):
        self.window = pygame.display.set_mode((270, 270))

        self.game = Game(270, 270, 9, self.window, 0, 0)
        self.model = self.load_model()
Пример #17
0
from snake import Game, Renderer, KeyboardInput

H = 10
W = 10

game = Game(H, W)
renderer = Renderer(game)
input = KeyboardInput(renderer.window)

while True:
    renderer.render_frame()
    action = input.get_input()
    if action:
        game.input(action)

    game.update()
    if game.has_ended():
        renderer.close_window()
        print('THE END')
        break
    '''   
    try:
        change = game.changed_tiles
        renderer.render_frame(change)
        action = input.get_input()
        if action:
            game.input(action)
        game.update()

        if game.has_ended():
            print('THE END')
Пример #18
0
    @property
    def optimizer(self):
        return self.q_network.optimizer

    @property
    def mask(self):
        return self.q_network.mask

    def __getattr__(self, name):
        if name in self.hyper_params:
            return self.hyper_params[name]
        else:
            raise AttributeError()

    def __del__(self):
        self.sess.close()


if __name__ == '__main__':
    from snake import Game
    from window import Window
    number = 6
    block_size = 20
    g = Game(number=number)
    window = Window(number=number,
                    block_size=block_size,
                    expansion=1.5,
                    speed=0.2)
    dqn = DQN(game=g)
    dqn.train(window=window)
Пример #19
0
    f_x = int(0 if not food_relative[0] else 1 * np.sign(food_relative[0])) + 1  # Select food relative x
    f_y = int(0 if not food_relative[1] else 1 * np.sign(food_relative[1])) + 1  # Select food relative y
    for i, field in enumerate(view_area.ravel()):
        if not field:  # Ignore 0=Path
            continue

        add = (FIELD_STATES ** i) * field
        discrete_index += add

    return direction, f_x, f_y, discrete_index


if __name__ == "__main__":

    game = Game(food_ammount=1, render=True)
    valid = True
    observation = Game().reset()
    score = 0
    q_table = np.load(f"{FILE}.npy", allow_pickle=True)

    os.makedirs(f"{FILE}", exist_ok=True)
    step = 0
    while valid:
        game.draw()
        surface = pygame.display.get_surface()
        pygame.image.save(surface, f"{FILE}/image_{step}.png")

        old_observation = observation
        current_q_values = get_discrete_vals(q_table, old_observation)
Пример #20
0
# on se limite ici un peu artificiellement à des cellules carrées
parser = ArgumentParser()
parser.add_argument("-W",
                    "--width",
                    default=15,
                    type=int,
                    help="horizontal size of board, in cells")
parser.add_argument("-H",
                    "--height",
                    default=15,
                    type=int,
                    help="vertical size of board, in cells")
parser.add_argument("-C",
                    "--cell",
                    default=20,
                    type=int,
                    help="cell size in pixels")
args = parser.parse_args()

# dans l'objet args on va retrouver les 3 réglages
# tels que définis sur la ligne de commande
# par exemple
# args.width vaut l'entier 15 si on ne précise pas -W
# args.width vaut l'entier 20 si on précise -W 20
#
# on peut toujours invoquer le programme avec --help
# python main.py --help
print(args.width, args.height, args.cell)

game = Game((args.width, args.height), (args.cell, args.cell))
game.run()
Пример #21
0
                    if event.key not in KEY_TO_ACTION_MAP:
                        continue

                    # Act on bound keys.
                    observation, reward, done, info = env.step(
                        KEY_TO_ACTION_MAP[event.key])
                    print(
                        f"Observation: {observation}\tReward: {reward}\tDone: {done}\tInfo: {info}"
                    )
                    if done:
                        env.reset()

            # Limit frame rate.
            update_clock.tick(30)

    do_game_loop()

    pygame.quit()


if __name__ == "__main__":
    from snake import Game
    from snake.observation_strategies.default_observation_strategy import DefaultObservationStrategy
    from snake.reward_strategies.default_reward_strategy import DefaultRewardStrategy

    play(
        Game(map_size=[10, 10],
             initial_snake_length=3,
             create_observation_strategy=DefaultObservationStrategy,
             create_reward_strategy=DefaultRewardStrategy))
Пример #22
0
 def test_game(self):
     snake = Snake([[5, 4], [4, 4], [3, 4]])
     game = Game(10, snake, Direction.UP)
Пример #23
0
from snake import Game

# This is a sample Python script.

# Press Shift+F10 to execute it or replace it with your code.
# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings.

# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    snake = Game()
    snake.run()

# See PyCharm help at https://www.jetbrains.com/help/pycharm/
Пример #24
0
 def __init__(self):
     self.game = Game()
     self.square_size = 9 # the observation size
     self.timestep = 0
Пример #25
0
class SnakeEnv(Environment):
    """ A (terribly simplified) Blackjack game implementation of an environment. """
    def __init__(self, indim, outdim):
        super().__init__()
        """ All tasks are coupled to an environment. """
        # the number of action values the environment accepts
        self.indim = indim

        # the number of sensor values the environment produces
        self.outdim = outdim

        self.game = None

        self.running = True
        self.numActions = 4
        self.allActions = [
            pygame.K_UP, pygame.K_DOWN, pygame.K_RIGHT, pygame.K_LEFT
        ]

        self.stochAction = 0.

        self.apple_distance = 0.
        self.apple_change = 0.

    def init_game(self, snake_size):
        self.game = Game()
        self.game.init_game(snake_size)
        self.running = True

    def getSensors(self):
        """ the currently visible state of the world (the    observation may be stochastic - repeated calls returning different values)
            :rtype: by default, this is assumed to be a numpy array of doubles
        """
        self.apple_distance = self.game.get_apple_distance()
        state = self.game.get_current_state()
        print(state)
        index = 9 * state["left"] + 3 * state["forward"] + state["right"]
        print(index)
        return [
            float(index),
        ]

    def performAction(self, action):
        """ perform an action on the world that changes it's internal state (maybe stochastically).
            :key action: an action that should be executed in the Environment.
            :type action: by default, this is assumed to be a numpy array of doubles
        """
        action = int(action[0])
        if self.stochAction > 0:
            if random() < self.stochAction:
                print(random())
                action = choice(list(range(len(self.allActions))))
        keydown = self.allActions[action]

        self.game.update_frame(keydown)
        if self.game.info["done"]:
            self.running = False
            return self.running

        self.apple_change = self.apple_distance - self.game.get_apple_distance(
        )

        self.game.render()

        if action == 0:
            print("up")
        if action == 1:
            print("down")
        if action == 2:
            print("right")
        if action == 3:
            print("left")

    def reset(self):
        """ Most environments will implement this optional method that allows for reinitialization.
Пример #26
0
 def __init__(self):
     self.action_space = Discrete(3) # 0 = turn left, 1 = do nothing, 2 = turn right
     self.state = [0, 0, 1, 0]
     self.game = Game()
     self.reward = 0
     self.done = False
Пример #27
0
        return self.env.outdim


# define action-value table
# number of states is:
#
#    current value: 1-21
#
# number of actions:
#
#    Stand=0, Hit=1

av_table = ActionValueTable(27, 4)
av_table.initialize(2.)

game = Game()

# define Q-learning agent
learner = Q(0.5, 0.2)
learner._setExplorer(EpsilonGreedyExplorer(0.0))
agent = LearningAgent(av_table, learner)

# define the environment
env = SnakeEnv(4, 27)
env.init_game(15)

# define the task
task = BlackjackTask(env)

# finally, define experiment
experiment = Experiment(task, agent)
Пример #28
0
        print(f"{i:3}: {net.fitness}")


def select_networks(nets):
    return nets[:len(nets) // 2]


def crossover_networks(nets):
    for n1, n2 in zip(nets[:-1:2], nets[1::2]):  # chunk by two
        nets.extend(n1.SP_crossover(n2))
    return nets


if __name__ == "__main__":
    # init the snake game
    game = Game()
    # run it faaaaaaaast
    game.fps = 600

    # create the population
    population = 100
    net_args = [12, 10, 8, 4]
    networks = [Network(net_args) for _ in range(population)]

    best_net_data = (0, [])
    snake_data = []
    generation = 0
    max_generation = 50
    while True:
        generation += 1
        print(f"Generation: #{generation}")
Пример #29
0
 def init_game(self, snake_size):
     self.game = Game()
     self.game.init_game(snake_size)
     self.running = True
Пример #30
0
from snake import Game


if __name__ == "__main__":
    a = Game('snake', 1280, 640, 10)
    a.run()