class SnakeEnv(Env): def __init__(self): self.action_space = Discrete(3) # 0 = turn left, 1 = do nothing, 2 = turn right self.state = [0, 0, 1, 0] self.game = Game() self.reward = 0 self.done = False def step(self, action): offset = (action - 1) translated_action = offset + self.game.snake.direction if translated_action < 0: translated_action = 3 if translated_action > 3: translated_action = 0 self.reward, self.done = self.game.run(1, translated_action) diff = (self.game.food.position[0] - self.game.snake.snake[0][0], self.game.food.position[1] - self.game.snake.snake[0][1]) self.state[0] = int(diff[0] < 0) self.state[2] = int(diff[0] > 1) self.state[1] = int(diff[1] < 0) self.state[3] = int(diff[1] > 0) return self.state, self.reward, self.done, {} def render(self): self.game.render() def reset(self): self.game.reset()
def eval_genome(genome, config): net = neat.nn.FeedForwardNetwork.create(genome, config) fitnesses = [] for runs in range(runs_per_net): game = Game(20, 20) # Run the given simulation for up to num_steps time steps. fitness = 0.0 while True: inputs = game.get_normalized_state() action = net.activate(inputs) # Apply action to the simulated snake valid = game.step(np.argmax(action)) # Stop if the network fails to keep the snake within the boundaries or hits itself. # The per-run fitness is the number of pills eaten if not valid: break fitness = game.fitness fitnesses.append(fitness) # The genome's fitness is its worst performance across all runs. return min(fitnesses)
def getTrainingData(self): print('Getting Training Data . . .') data = [] number = int(self.train_games / 20) for x in range(self.train_games): game = Game(x=self.x, y=self.y) c_data = [] self.game = game snake = game.start() current_state = self.getState(snake) for _ in range(self.max_steps): action = self.getAction() length = snake.length done, snake, closer = game.step(action) if done: break elif not closer: continue else: correct_output = [0, 0, 0] correct_output[action + 1] = 1 num = 1 if snake.length > length: num = 3 for _ in range(num): c_data.append([current_state, correct_output]) current_state = self.getState(snake) if snake.length > 2: for el in c_data: data.append(el) if x % number == 0: print(f'{int(x/self.train_games*100)}%') return data
def eval_genome(genome, config): net = neat.nn.FeedForwardNetwork.create(genome, config) fitnesses = [] for runs in range(runs_per_net): #pygame.init() #screen = pygame.display.set_mode((20 * 16,20 * 16)) #screen.fill(pygame.Color('black')) #pygame.display.set_caption('Snake') #pygame.display.flip() sim = Game(20, 20) # Run the given simulation for up to num_steps time steps. fitness = 0.0 while True: inputs = sim.get_normalized_state() action = net.activate(inputs) # Apply action to the simulated snake valid = sim.step(np.argmax(action)) # Stop if the network fails to keep the snake within the boundaries or hits itself. # The per-run fitness is the number of pills eaten if not valid: break fitness = sim.score fitnesses.append(fitness) # The genome's fitness is its worst performance across all runs. return min(fitnesses)
class Play(): def __init__(self): self.window = pygame.display.set_mode((270, 270)) self.game = Game(270, 270, 9, self.window, 0, 0) self.model = self.load_model() def load_model(self): try: f = torch.load("best.pth") except: f = None return f def run(self): clock = pygame.time.Clock() while True: pygame.time.delay(50) clock.tick(10) self.window.fill((0, 0, 0)) self.game.game_loop(train=False, model=self.model) pygame.display.update()
def test_egg(): g = Game((5, 5), (10, 10)) # create a snake that takes almost all the space g.snake = [ (i, j) for i in range(5) for j in range(5) ] g.snake.remove( (2, 2)) assert g.random_egg() == (2, 2)
class Play(): def __init__(self): self.window = pygame.display.set_mode((270, 270)) self.game = Game(270, 270, 9, self.window, 0, 0) self.model = self.load_model() def load_model(self): try: f = torch.load("best.pth") except: f = None return f def get_average(self, arr): s = 0.0 for a in arr: s += a return s / len(arr) def run(self): clock = pygame.time.Clock() cicles = 0 inf_loops = 0 prev = 0 while True: pygame.time.delay(1) clock.tick(1000000) self.window.fill((0, 0, 0)) self.game.game_loop(train=False, model=self.model) if self.game.reward == 0: cicles += 1 inf_loops = 0 if inf_loops == 300 and prev == self.game.points: self.game.restart() cicles += 1 inf_loops = 0 if prev < self.game.points: prev = self.game.points inf_loops += 1 if cicles == 1000: print( f'Max: {max(self.game.points_ls)}, Average: {self.get_average(self.game.points_ls)}' ) exit() pygame.display.update()
def __init__(self): self.window = pygame.display.set_mode((270, 270)) self.game = Game(270, 270, 9, self.window, 0, 0) self.model = self.load_model() self.cnt = 0 if self.model != None: self.game.agent.update_model(self.model) self.game.agent.update_tgt(self.model)
def build_agent(): game = Game( map_size=(20, 20), initial_snake_length=3, create_observation_strategy=InverseDistanceObservationStrategy, create_reward_strategy=SquareExpRewardStrategy) return snake.agent.Agent(env=game, hidden_nodes=[18, 18])
def test_move(): g = Game((5, 5), (10, 10)) # create a small snake g.snake = [ (1, 1), (2, 1), (3, 1) ] g.egg = (0, 0) g.move_snake(1, 0, False) assert g.snake == [ (2, 1), (3, 1), (4, 1) ] g.move_snake(1, 0, False) assert g.snake == [ (3, 1), (4, 1), (0, 1) ] # no u-turn g.move_snake(-1, 0, False) assert g.snake == [ (3, 1), (4, 1), (0, 1) ]
def build_agent(): game = Game( map_size=(20, 20), create_observation_strategy=InverseDistanceObservationStrategy, create_reward_strategy=SurvivalRewardStrategy) game = MaxTimestepsWrapper(game, max_timesteps=1000) return snake.agent.Agent(env=game, hidden_nodes=[18, 18])
class Play(): def __init__(self): self.window = pygame.display.set_mode((270, 270)) self.game = Game(270, 270, 9, self.window, 0, 0) self.model = self.load_model() self.cnt = 0 if self.model != None: self.game.agent.update_model(self.model) self.game.agent.update_tgt(self.model) def save_model(self): print("Saving model") torch.save(self.game.agent.tgt, "best.pth") def load_model(self): try: f = torch.load("best.pth") except: f = None return f def run(self): clock = pygame.time.Clock() while True: pygame.time.delay(1) clock.tick(1000000) self.window.fill((0, 0, 0)) self.game.game_loop(train=True) if self.game.agent.tgt_updated: self.cnt += 1 print(self.cnt, " Target model updated:", self.game.get_average_reward()) self.game.agent.tgt_updated = False self.save_model() if self.cnt == 500: exit() pygame.display.update()
def showGame(self, model): game = Game(x=self.x, y=self.y, gui=True) self.game = game while True: snake = game.start() steps = self.max_steps current_state = self.getState(snake) while True: m = model.predict(np.array([current_state])) action = list(m[0]).index(max(list(m[0]))) - 1 length = snake.length done, snake, c = game.step(action) if done: break elif snake.length > length: steps = self.max_steps else: current_state = self.getState(snake) time.sleep(.05) steps -= 1 if steps == 0: break
def test(self, model): print('Testing . . .') num = int(self.test_games / 20) lengths = [] game = Game(x=self.x, y=self.y) self.game = game for x in range(self.test_games): snake = game.start() steps = self.max_steps current_state = self.getState(snake) while True: m = model.predict(np.array([current_state])) action = list(m[0]).index(max(list(m[0]))) - 1 length = snake.length done, snake, _ = game.step(action) if done: break elif snake.length > length: steps = self.max_steps else: current_state = self.getState(snake) steps -= 1 if steps == 0: break lengths.append(snake.length) if x % num == 0: print(f'{int((x/self.test_games)*100)}%') print(f'Average: {sum(lengths)/len(lengths)}')
class SnakeWrapper: """ return the croped square_size-by-square_size after rotation and changing to one-hot and doing block-notation. """ # num_classes is the number of different element types that can be found on the board. # yes I know, actually we have 9 types, but 10 is nicer. (4 snakes + 1 obstacle + 3 fruits + 1 empty = 9) num_classes = 10 # the action space. 0-left, 1-forward, 2-right. action_space = gym.spaces.Discrete(3) # the observation space. 9x9 one hot vectors, total 9x9x10. # your snake always look up (the observation is a rotated crop of the board). observation_space = gym.spaces.Box( low=0, high=num_classes, shape=(9, 9, 10), dtype=np.int ) def __init__(self): self.game = Game() self.square_size = 9 # the observation size self.timestep = 0 def step(self, action): # get action as integer, move the game one step forward # return tuple: state, reward, done, info. done is always False - Snake game never ends. action = int_to_action[action] reward = self.game.step(action) head_pos = self.game.players[1].chain[-1] direction = self.game.players[1].direction board = self.game.board state = preprocess_snake_state(board, head_pos, direction, self.square_size, SnakeWrapper.num_classes) self.timestep += 1 return state, reward def seed(self, seed=None): return self.game.seed(seed) # reset the game and return the board observation def reset(self): self.game.reset() self.timestep = 0 first_state, _ = self.step(0) return first_state # print the board to the console def render(self, mode='human'): self.game.render(self.timestep)
def __init__(self): self.window = pygame.display.set_mode((270, 270)) self.game = Game(270, 270, 9, self.window, 0, 0) self.model = self.load_model()
from snake import Game, Renderer, KeyboardInput H = 10 W = 10 game = Game(H, W) renderer = Renderer(game) input = KeyboardInput(renderer.window) while True: renderer.render_frame() action = input.get_input() if action: game.input(action) game.update() if game.has_ended(): renderer.close_window() print('THE END') break ''' try: change = game.changed_tiles renderer.render_frame(change) action = input.get_input() if action: game.input(action) game.update() if game.has_ended(): print('THE END')
@property def optimizer(self): return self.q_network.optimizer @property def mask(self): return self.q_network.mask def __getattr__(self, name): if name in self.hyper_params: return self.hyper_params[name] else: raise AttributeError() def __del__(self): self.sess.close() if __name__ == '__main__': from snake import Game from window import Window number = 6 block_size = 20 g = Game(number=number) window = Window(number=number, block_size=block_size, expansion=1.5, speed=0.2) dqn = DQN(game=g) dqn.train(window=window)
f_x = int(0 if not food_relative[0] else 1 * np.sign(food_relative[0])) + 1 # Select food relative x f_y = int(0 if not food_relative[1] else 1 * np.sign(food_relative[1])) + 1 # Select food relative y for i, field in enumerate(view_area.ravel()): if not field: # Ignore 0=Path continue add = (FIELD_STATES ** i) * field discrete_index += add return direction, f_x, f_y, discrete_index if __name__ == "__main__": game = Game(food_ammount=1, render=True) valid = True observation = Game().reset() score = 0 q_table = np.load(f"{FILE}.npy", allow_pickle=True) os.makedirs(f"{FILE}", exist_ok=True) step = 0 while valid: game.draw() surface = pygame.display.get_surface() pygame.image.save(surface, f"{FILE}/image_{step}.png") old_observation = observation current_q_values = get_discrete_vals(q_table, old_observation)
# on se limite ici un peu artificiellement à des cellules carrées parser = ArgumentParser() parser.add_argument("-W", "--width", default=15, type=int, help="horizontal size of board, in cells") parser.add_argument("-H", "--height", default=15, type=int, help="vertical size of board, in cells") parser.add_argument("-C", "--cell", default=20, type=int, help="cell size in pixels") args = parser.parse_args() # dans l'objet args on va retrouver les 3 réglages # tels que définis sur la ligne de commande # par exemple # args.width vaut l'entier 15 si on ne précise pas -W # args.width vaut l'entier 20 si on précise -W 20 # # on peut toujours invoquer le programme avec --help # python main.py --help print(args.width, args.height, args.cell) game = Game((args.width, args.height), (args.cell, args.cell)) game.run()
if event.key not in KEY_TO_ACTION_MAP: continue # Act on bound keys. observation, reward, done, info = env.step( KEY_TO_ACTION_MAP[event.key]) print( f"Observation: {observation}\tReward: {reward}\tDone: {done}\tInfo: {info}" ) if done: env.reset() # Limit frame rate. update_clock.tick(30) do_game_loop() pygame.quit() if __name__ == "__main__": from snake import Game from snake.observation_strategies.default_observation_strategy import DefaultObservationStrategy from snake.reward_strategies.default_reward_strategy import DefaultRewardStrategy play( Game(map_size=[10, 10], initial_snake_length=3, create_observation_strategy=DefaultObservationStrategy, create_reward_strategy=DefaultRewardStrategy))
def test_game(self): snake = Snake([[5, 4], [4, 4], [3, 4]]) game = Game(10, snake, Direction.UP)
from snake import Game # This is a sample Python script. # Press Shift+F10 to execute it or replace it with your code. # Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. # Press the green button in the gutter to run the script. if __name__ == '__main__': snake = Game() snake.run() # See PyCharm help at https://www.jetbrains.com/help/pycharm/
def __init__(self): self.game = Game() self.square_size = 9 # the observation size self.timestep = 0
class SnakeEnv(Environment): """ A (terribly simplified) Blackjack game implementation of an environment. """ def __init__(self, indim, outdim): super().__init__() """ All tasks are coupled to an environment. """ # the number of action values the environment accepts self.indim = indim # the number of sensor values the environment produces self.outdim = outdim self.game = None self.running = True self.numActions = 4 self.allActions = [ pygame.K_UP, pygame.K_DOWN, pygame.K_RIGHT, pygame.K_LEFT ] self.stochAction = 0. self.apple_distance = 0. self.apple_change = 0. def init_game(self, snake_size): self.game = Game() self.game.init_game(snake_size) self.running = True def getSensors(self): """ the currently visible state of the world (the observation may be stochastic - repeated calls returning different values) :rtype: by default, this is assumed to be a numpy array of doubles """ self.apple_distance = self.game.get_apple_distance() state = self.game.get_current_state() print(state) index = 9 * state["left"] + 3 * state["forward"] + state["right"] print(index) return [ float(index), ] def performAction(self, action): """ perform an action on the world that changes it's internal state (maybe stochastically). :key action: an action that should be executed in the Environment. :type action: by default, this is assumed to be a numpy array of doubles """ action = int(action[0]) if self.stochAction > 0: if random() < self.stochAction: print(random()) action = choice(list(range(len(self.allActions)))) keydown = self.allActions[action] self.game.update_frame(keydown) if self.game.info["done"]: self.running = False return self.running self.apple_change = self.apple_distance - self.game.get_apple_distance( ) self.game.render() if action == 0: print("up") if action == 1: print("down") if action == 2: print("right") if action == 3: print("left") def reset(self): """ Most environments will implement this optional method that allows for reinitialization.
def __init__(self): self.action_space = Discrete(3) # 0 = turn left, 1 = do nothing, 2 = turn right self.state = [0, 0, 1, 0] self.game = Game() self.reward = 0 self.done = False
return self.env.outdim # define action-value table # number of states is: # # current value: 1-21 # # number of actions: # # Stand=0, Hit=1 av_table = ActionValueTable(27, 4) av_table.initialize(2.) game = Game() # define Q-learning agent learner = Q(0.5, 0.2) learner._setExplorer(EpsilonGreedyExplorer(0.0)) agent = LearningAgent(av_table, learner) # define the environment env = SnakeEnv(4, 27) env.init_game(15) # define the task task = BlackjackTask(env) # finally, define experiment experiment = Experiment(task, agent)
print(f"{i:3}: {net.fitness}") def select_networks(nets): return nets[:len(nets) // 2] def crossover_networks(nets): for n1, n2 in zip(nets[:-1:2], nets[1::2]): # chunk by two nets.extend(n1.SP_crossover(n2)) return nets if __name__ == "__main__": # init the snake game game = Game() # run it faaaaaaaast game.fps = 600 # create the population population = 100 net_args = [12, 10, 8, 4] networks = [Network(net_args) for _ in range(population)] best_net_data = (0, []) snake_data = [] generation = 0 max_generation = 50 while True: generation += 1 print(f"Generation: #{generation}")
def init_game(self, snake_size): self.game = Game() self.game.init_game(snake_size) self.running = True
from snake import Game if __name__ == "__main__": a = Game('snake', 1280, 640, 10) a.run()