def test_model(self, model): steps_arr = [] scores_arr = [] for _ in range(self.test_games): steps = 0 game_memory = [] game = SnakeGame() _, score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) for _ in range(self.goal_steps): predictions = [] for action in range(-1, 2): predictions.append( model.predict( self.add_action_to_observation( prev_observation, action).reshape(-1, 5, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, score, snake, food = game.step(game_action) game_memory.append([prev_observation, action]) if done: # print('-----') # print(steps) # print(snake) # print(food) # print(prev_observation) # print(predictions) break else: prev_observation = self.generate_observation(snake, food) steps += 1 steps_arr.append(steps) scores_arr.append(score)
def test_model(self, model): steps_arr = [] for _ in range(self.test_games): steps = 0 game_memory = [] game = SnakeGame() _, _, snake, _ = game.start() prev_observation = self.generate_observation(snake) for _ in range(self.goal_steps): predictions = [] for action in range(-1, 2): predictions.append( model.predict( self.add_action_to_observation( prev_observation, action).reshape(-1, 4, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, _, snake, _ = game.step(game_action) game_memory.append([prev_observation, action]) if done: break else: prev_observation = self.generate_observation(snake) steps += 1 steps_arr.append(steps) print('Average steps:', mean(steps_arr)) print(Counter(steps_arr))
class Agent: def __init__(self, shape=(10, 12, 4), size=(20, 20)): self.NeuralNetwork = NeuralNetwork(shape) self.row, self.col = size self.game = SnakeGame(self.row, self.col) # performs a move in the game according to the agent's recommendation def move(self): # predicts best move with neural net X = self.game.information() y = self.NeuralNetwork.predict(X) # gets moves in rank order move_args = np.argsort(y) moves = list(Direction) # if selected move turns 180 degrees pick the next best move selected_move = moves[move_args[-1]] if np.linalg.norm( np.array(selected_move.value) + np.array(self.game.direction.value)) == 0: selected_move = moves[move_args[-2]] self.game.direction = selected_move self.game.step()
def initial_population(self): training_data = [] for _ in range(self.initial_games): game = SnakeGame() _, prev_score, snake, food, obstacles = game.start() # returns generate_observation from snake game. prev_observation = self.generate_observation(snake, food, obstacles) prev_food_distance = self.get_food_distance(snake, food) for _ in range(self.goal_steps): action, game_action = self.generate_action(snake) done, score, snake, food, obstacles = game.step(game_action) #print(training_data) #input("test") if done: training_data.append([self.add_action_to_observation(prev_observation, action), -1]) break else: food_distance = self.get_food_distance(snake, food) if score > prev_score or food_distance < prev_food_distance: # did you get closer to the objective? training_data.append([self.add_action_to_observation(prev_observation, action), 1]) # label as good decision. else: training_data.append([self.add_action_to_observation(prev_observation, action), 0]) # label as bad decision. prev_observation = self.generate_observation(snake, food, obstacles) prev_food_distance = food_distance ''' Later we will be using this "1" or "0" to provide estimates for each possible decision. ''' return training_data
def initial_population(self): training_data = [] for _ in range(self.initial_games): game = SnakeGame() _, prev_score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) prev_food_distance = self.get_food_distance(snake, food) for _ in range(self.goal_steps): action, game_action = self.generate_action(snake) done, score, snake, food = game.step(game_action) if done: training_data.append([ self.add_action_to_observation(prev_observation, action), -1 ]) break else: food_distance = self.get_food_distance(snake, food) if score > prev_score or food_distance < prev_food_distance: training_data.append([ self.add_action_to_observation( prev_observation, action), 1 ]) else: training_data.append([ self.add_action_to_observation( prev_observation, action), 0 ]) prev_observation = self.generate_observation(snake, food) prev_food_distance = food_distance return training_data
def __init__(self): super().__init__('cs!') self.game = SnakeGame(board_size) self.last_msg = None self.tie_detected = False self.has_ended = False
def train(num_episodes, episode_length, gamma=0.7): for episode_no in range(num_episodes): print(f"\rEpisode {episode_no} out of {num_episodes}", end="\r") games = [SnakeGame(**opts) for i in range(batch_size)] memories = [Memory() for i in range(batch_size)] for _ in range(episode_length): observations = numpy.array([game.get_board() for game in games]) actions = choose_action(snake_model, observations, single=False) for game, action in zip(games, actions): game.tick(game_actions[action]) for memory, observation, action, game in zip( memories, observations, actions, games ): memory.add_to_memory(observation, action, reward(game)) for i in range(batch_size): if games[i].game_over: games[i] = SnakeGame(**opts) batch_memory = aggregate_memories(memories) train_step( snake_model, optimizer, observations=numpy.stack(batch_memory.observations, 0), actions=numpy.array(batch_memory.actions), discounted_rewards=discount_rewards( batch_memory.rewards, GAME_OVER_REWARD, gamma ), )
def test_model(self, model): steps_arr = [] scores_arr = [] count = 0 solved = 0 print("Testing in progress") for i in range(self.test_games): steps = 0 game_memory = [] game = SnakeGame() if self.game_type == 'maze': game = MazeGame() _, score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) for _ in range(self.goal_steps): predictions = [] for action in range(-1, 2): predictions.append( model.predict( self.add_action_to_observation( prev_observation, action).reshape(-1, 5, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, score, snake, food = game.step(game_action) game_memory.append([prev_observation, action]) if done: self.progress(i + 1, self.test_games) if self.game_type == 'maze' and score == 1: solved += 1 count += 1 if False: #if count % 100 == 0: print('-----') print('id: ' + str(count)) print(steps) print(snake) print(food) print(prev_observation) print(predictions) break else: prev_observation = self.generate_observation(snake, food) steps += 1 steps_arr.append(steps) scores_arr.append(score) print("\n\n") print('Average steps:', mean(steps_arr)) #print(Counter(steps_arr)) print('Average score:', mean(scores_arr)) #print(Counter(scores_arr)) scores_arr.sort() print('Lowest score:', scores_arr[0]) print('Highest score:', scores_arr[-1]) if self.game_type == 'maze': print('Total solved mazes:', solved) with open('steps_arr', 'wb') as file: pickle.dump(steps_arr, file) with open('scores_arr', 'wb') as file: pickle.dump(scores_arr, file)
def visualise_game(self, model): game = SnakeGame(gui = True) _, _, snake, _ = game.start() prev_observation = self.generate_observation(snake) for _ in range(self.goal_steps): predictions = [] for action in range(-1, 2): predictions.append(model.predict(self.add_action_to_observation(prev_observation, action).reshape(-1, 4, 1))) action = np.argmax(np.array(predictions)) game_action = self.get_game_action(snake, action - 1) done, _, snake, _ = game.step(game_action) if done: break else: prev_observation = self.generate_observation(snake)
def start_game(board_size, delay_time=250): game = SnakeGame(board_size) win, ren = init_sdl2(game, 'snek') game_thread = Thread(target=game_loop, args=(game, delay_time)) game_thread.start() while game_thread.is_alive(): show_game(game, ren)
def initial_population(self): training_data = [] for _ in range(self.initial_games): game = SnakeGame() _, _, snake, _ = game.start() prev_observation = self.generate_observation(snake) for _ in range(self.goal_steps): action, game_action = self.generate_action(snake) done, _, snake, _ = game.step(game_action) if done: training_data.append([self.add_action_to_observation(prev_observation, action), 0]) break else: training_data.append([self.add_action_to_observation(prev_observation, action), 1]) prev_observation = self.generate_observation(snake) print(len(training_data)) return training_data
def __init__(self): super(SnakeGameDemoWidget, self).__init__() self.resize(QtCore.QSize(300, 300)) self.setWindowTitle("SnakeGame") self.snake_game = SnakeGame() self.snake_game.deterministic_food = True self.snake_game.food_positions = [ (6, 6), (2, 15), (17, 3) ] self.snake_game.rect = self.rect() self.snake_game.width = self.width() self.snake_game.height = self.height() self.snake_game.setup() self.tick_timer = QTimer() self.tick_timer.setInterval(100) self.tick_timer.timeout.connect(self.tick)
def generate_training_data(self, initial_games, goal_steps): """Generate training data for the neural network based on random action. Parameters ---------- initial_games : int number of games for the training goal_steps : int max number of steps in a game Returns ------- list list containing the input data and the targets """ training_data = [] from tqdm import tqdm for i in tqdm(range(initial_games)): state = SnakeGame() prev_food_distance = self.get_distance(state.snake[0], state.food) prev_score = state.score prev_observation = self.get_observation(state) for j in range(goal_steps): # Get action action = self.generate_action(state) # Update state state = state(action) # We will now evaluate the performed moves, using # a target system where -1 means a bad move, 0 means a neutral # move and 1 means a good move. # A move is bad if the snake crashes. if state.done: target = -1 training_data.append( self.pack_data(prev_observation, action, target)) break else: food_distance = self.get_distance(state.snake[0], state.food) # A move is considered as good if the snake # gets closer to the food or eats the food. if state.score > prev_score or food_distance < prev_food_distance: target = 1 else: target = 0 training_data.append( self.pack_data(prev_observation, action, target)) prev_observation = self.get_observation(state) prev_food_distance = food_distance prev_score = state.score return training_data
def test_model(self, model): steps_arr = [] scores_arr = [] for _ in range(self.test_games): steps = 0 game_memory = [] game = SnakeGame() _, score, snake, food, obstacles = game.start() prev_observation = self.generate_observation( snake, food, obstacles) for _ in range(self.goal_steps): predictions = [] for action in range( -1, 2): # iterate through each possible decision predictions.append( model.predict( self.add_action_to_observation( prev_observation, action).reshape(-1, 5, 1))) action = np.argmax(np.array( predictions)) # choose decision with highest value (1) game_action = self.get_game_action( snake, action - 1) # perform action in the game. done, score, snake, food, obstacles = game.step(game_action) game_memory.append([prev_observation, action]) if done: print('-----') print(steps) print(snake) print(food) print(prev_observation) print(predictions) break else: prev_observation = self.generate_observation( snake, food, obstacles) steps += 1 steps_arr.append(steps) scores_arr.append(score) print('Average steps:', mean(steps_arr)) print(Counter(steps_arr)) print('Average score:', mean(scores_arr)) print(Counter(scores_arr))
def train_agent(): plot_scores = [] plot_mean_scores = [] total_score = 0 record = 0 agent = Agent() game = SnakeGame() while True: # get old state state_old = agent.get_state(game) # get move final_move = agent.get_action(state_old) # perform move and get new state reward, done, score = game.play_step(final_move) state_new = agent.get_state(game) # train short memory agent.train_short_memory(state_old, final_move, reward, state_new, done) # remember agent.remember(state_old, final_move, reward, state_new, done) if done: game.reset() agent.num_games += 1 agent.train_long_memory() if score > record: record = score agent.model.save() print(f'Game: {agent.num_games}, Score: {score}, Record: {record}') plot_scores.append(score) total_score += score mean_score = total_score / agent.num_games plot_mean_scores.append(mean_score) plot_scores(plot_scores, plot_mean_scores)
def main(): g = SnakeGame() try: g.run() finally: g.quit_game()
class SnakeGameDemoWidget(QtWidgets.QWidget): """ SnakeGame """ def __init__(self): super(SnakeGameDemoWidget, self).__init__() self.resize(QtCore.QSize(300, 300)) self.setWindowTitle("SnakeGame") self.snake_game = SnakeGame() self.snake_game.deterministic_food = True self.snake_game.food_positions = [ (6, 6), (2, 15), (17, 3) ] self.snake_game.rect = self.rect() self.snake_game.width = self.width() self.snake_game.height = self.height() self.snake_game.setup() self.tick_timer = QTimer() self.tick_timer.setInterval(100) self.tick_timer.timeout.connect(self.tick) def showEvent(self, event): self.snake_game.rect = self.rect() self.tick_timer.start() def resizeEvent(self, event): self.snake_game.rect = self.rect() def closeEvent(self, event): self.tick_timer.stop() def tick(self): self.snake_game.tick() self.update() def paintEvent(self, event): painter = QtGui.QPainter(self) painter.setRenderHint(QtGui.QPainter.Antialiasing, True) self.snake_game.paint(painter) def keyPressEvent(self, event): self.snake_game.key_pressed(event.key()) def sizeHint(self): return QtCore.QSize(300, 300)
async def advance(self): if self.game.has_ended: self.last_msg = None self.game = SnakeGame(board_size) self.game.has_ended = False return if self.last_msg: # update last_msg with the one in cache (https://github.com/Rapptz/discord.py/issues/861) self.last_msg = discord.utils.get(self.cached_messages, id=self.last_msg.id) direction = None tie_message_appended = False prev_last_message = None while direction is None: direction = self.get_winning_move() if not direction: if not tie_message_appended: prev_last_message = self.last_msg.content await self.last_msg.edit( content='**❗ Tie detected**\n' + prev_last_message) tie_message_appended = True def check(reaction: discord.Reaction, user: discord.User): return user.id != self.user.id and reaction.message.id == self.last_msg.id await self.wait_for('reaction_add', check=check) if tie_message_appended: await self.last_msg.edit(content=prev_last_message) self.game.advance(direction) img = self.create_image() await self.send_new_state(img) if self.game.has_ended: self.has_ended = True
def initial_population(self): training_data = [] print("Generating initial games") for i in range(self.initial_games): self.progress(i + 1, self.initial_games) game = SnakeGame() if self.game_type == 'maze': game = MazeGame() _, prev_score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) prev_food_distance = self.get_food_distance(snake, food) for _ in range(self.goal_steps): action, game_action = self.generate_action(snake) done, score, snake, food = game.step(game_action) if done: training_data.append([ self.add_action_to_observation(prev_observation, action), -1 ]) # Snake is dead break else: food_distance = self.get_food_distance(snake, food) if score > prev_score or food_distance < prev_food_distance: training_data.append([ self.add_action_to_observation( prev_observation, action), 1 ]) # The last move was efficient else: training_data.append([ self.add_action_to_observation( prev_observation, action), 0 ]) # The last move was not efficient prev_observation = self.generate_observation(snake, food) prev_food_distance = food_distance with open('init_pop_gen', 'wb') as file: pickle.dump(training_data, file) return training_data
def train(num_episodes): max_moves_without_fruit = 15 for i in range(num_episodes): game = SnakeGame(width, height, num_fruit=3) num_moves_without_fruit = 0 while not game.game_over: observation = numpy.copy(game.board) action = choose_action(snake_model, observation) game.tick(game_actions[action]) ## next_observation = numpy.copy(game.board) num_moves_without_fruit += 1 if game.game_over: reward = -10 elif game.just_ate_fruit: reward = 1 elif num_moves_without_fruit > max_moves_without_fruit: reward = -1 num_moves_without_fruit = 0 else: reward = 0 memory.add_to_memory(observation, action, reward) if game.game_over: #### total_reward = sum(memory.rewards) total_observation = numpy.stack(memory.observations, 0) total_action = numpy.array(memory.actions) total_rewards = discount_rewards(memory.rewards, gamma) train_step(snake_model, optimizer, total_observation, total_action, total_rewards) memory.clear() break
num_games += 1 lengths += game.score() game.begin() # Save state every few runs if i % 5 == 0: agent.save_nn(path) eps = max(eps * decay, eps_min) if num_games > 0: print(' - games:', num_games, ', avg score:', round(lengths/num_games, 2), ', games won:', games_won) agent.save_nn(path) # Initialize game game = SnakeGame(SIZE, SIZE) # Initialize Neural Net nn = NeuralNetwork(inp_dim=[VIEW * VIEW + 8], out_dim=4, l1_dim=256, l2_dim=128, lr=.0001) # Initialize memory memory = ReplayBuffer(inp_dim=[VIEW * VIEW + 8], mem_size=100000, batch_size=64, priority_scale=PRIO) # Initialize Deep Q Agent agent = Agent(nn=nn, inp_dim=[VIEW * VIEW + 8], out_dim=4, memory=memory, gamma=.99) # Run training loop train(game, agent, PATH + FILENAME, loops=100, steps=1000, eps=0,
class CollabSnake(commands.Bot): game: SnakeGame channel: discord.TextChannel last_msg: discord.Message tie_detected: bool has_ended: bool def __init__(self): super().__init__('cs!') self.game = SnakeGame(board_size) self.last_msg = None self.tie_detected = False self.has_ended = False def draw_gamestate(self, draw: ImageDraw.Draw): # food (cross) food_tile_center = to_canvas_coord(self.game.food_position) draw.ink = 4 draw.point(food_tile_center + (-1, 0)) draw.point(food_tile_center + (0, -1)) draw.point(food_tile_center) draw.point(food_tile_center + (1, 0)) draw.point(food_tile_center + (0, 1)) # snake body draw.ink = 3 draw.fill = 3 snake_head_tile_center = to_canvas_coord(self.game.snake.head_position) tile_center = snake_head_tile_center is_first = True labs = partial(looparound_vector, board_size) tile_position = self.game.snake.head_position for move in reversed(self.game.snake.movements): start_offset = NULL_VECTOR end_offset = NULL_VECTOR if move.direction == UP: dir_vector = Vector(0, 1) start_offset = Vector(0, -1) elif move.direction == DOWN: dir_vector = Vector(0, -1) end_offset = Vector(0, 1) elif move.direction == LEFT: dir_vector = Vector(1, 0) start_offset = Vector(-1, 0) elif move.direction == RIGHT: dir_vector = Vector(-1, 0) end_offset = Vector(1, 0) for i in range(move.amount): tile_position = labs(tile_position + dir_vector) tile_center = to_canvas_coord(tile_position) if move.direction == UP and tile_position.y == board_size.h - 1: end_offset += (0, 1) elif move.direction == LEFT and tile_position.x == board_size.w - 1: end_offset += (1, 0) draw.rectangle([ tile_center + (-1, -1) + start_offset, tile_center + (1, 1) + end_offset ]) if is_first: # snake head end_offset = NULL_VECTOR if move.direction == UP and self.game.snake.head_position.y == board_size.h - 1: end_offset += (0, 1) elif move.direction == LEFT and self.game.snake.head_position.x == board_size.w - 1: end_offset += (1, 0) draw.rectangle([ snake_head_tile_center + (-1, -1), snake_head_tile_center + (1, 1) + end_offset ]) is_first = False # snake eyes last_movement_dir = self.game.snake.movements[-1].direction if last_movement_dir == UP or last_movement_dir == DOWN: draw.point(snake_head_tile_center + (-1, 0), 2) draw.point(snake_head_tile_center + (1, 0), 2) else: draw.point(snake_head_tile_center + (0, -1), 2) draw.point(snake_head_tile_center + (0, 1), 2) def draw_gameover(self, draw: ImageDraw.Draw): text = 'GAME OVER' text_width, text_height = draw.textsize(text) draw.text((Vector(*canvas_size) - (text_width, text_height)) / 2, text, fill=(255, 255, 255, 255)) def create_image(self): img = base_img.copy() draw = ImageDraw.Draw(img) self.draw_gamestate(draw) del draw if self.game.has_ended: img = img.convert('RGBA') fg_img = Image.new('RGBA', canvas_size, color=(0, 0, 0, 204)) fg_draw = ImageDraw.Draw(fg_img) self.draw_gameover(fg_draw) del fg_draw img.alpha_composite(fg_img) return img.resize(output_image_size) def get_winning_move(self): valid_reactions = list( filter(lambda r: r.emoji in (UP, DOWN, LEFT, RIGHT), self.last_msg.reactions)) winner = None tie = False if len(valid_reactions) > 0: best = valid_reactions[0] for reaction in valid_reactions[1:]: if reaction.count == best.count: tie = True elif reaction.count > best.count: best = reaction tie = False winner = best.emoji return winner if not tie else None async def send_new_state(self, img: Image): img_bytes = io.BytesIO() img.convert('RGBA').save(img_bytes, format='PNG') img_bytes.seek(0) content = (f'🐍 Did you know? {random.choice(snake_facts)}\n' 'React to choose what move comes next 🎮') if self.game.has_ended: content = ( 'Oh no! We\'ve lost!\n' f'Starting new game in {format_time(self.advance_task)}') self.last_msg = await self.channel.send( content=content, file=discord.File(img_bytes, filename='collab_snake.png')) if not self.game.has_ended: snake_last_move_dir = self.game.snake.movements[-1].direction if snake_last_move_dir != DOWN: await self.last_msg.add_reaction(UP) if snake_last_move_dir != RIGHT: await self.last_msg.add_reaction(LEFT) if snake_last_move_dir != UP: await self.last_msg.add_reaction(DOWN) if snake_last_move_dir != LEFT: await self.last_msg.add_reaction(RIGHT) async def advance(self): if self.game.has_ended: self.last_msg = None self.game = SnakeGame(board_size) self.game.has_ended = False return if self.last_msg: # update last_msg with the one in cache (https://github.com/Rapptz/discord.py/issues/861) self.last_msg = discord.utils.get(self.cached_messages, id=self.last_msg.id) direction = None tie_message_appended = False prev_last_message = None while direction is None: direction = self.get_winning_move() if not direction: if not tie_message_appended: prev_last_message = self.last_msg.content await self.last_msg.edit( content='**❗ Tie detected**\n' + prev_last_message) tie_message_appended = True def check(reaction: discord.Reaction, user: discord.User): return user.id != self.user.id and reaction.message.id == self.last_msg.id await self.wait_for('reaction_add', check=check) if tie_message_appended: await self.last_msg.edit(content=prev_last_message) self.game.advance(direction) img = self.create_image() await self.send_new_state(img) if self.game.has_ended: self.has_ended = True @tasks.loop(**send_interval) async def advance_task(self): try: await self.advance() except Exception as e: # TODO(netux): use after_advance_task() from sys import stdout from traceback import print_exc print(e) print_exc(file=stdout) @advance_task.after_loop async def after_advance_task(self): if self.advance_task.failed(): # TODO(netux): find out why execution stops after this print(self.advance_task.exception()) async def on_ready(self): print(f'Ready to work') self.channel = self.get_channel(channel_id) self.advance_task.start()
from snake import SnakeGame import random sg = SnakeGame(human_player=False) #sg.run() while True: sg.clock.tick(10) print(sg.xs, sg.ys) sg.dirs = random.randint(0, 3) # 0=Up, 1=Left, 2=Down, 3=Right sg.next_step()
if key == Key.left: self.action_list.append(self.game.ACTIONS["LEFT"]) if key == Key.right: self.action_list.append(self.game.ACTIONS["RIGHT"]) if key == Key.esc: self.game.done = True def __call__(self, game, speed): time.sleep(speed) action = self.game.ACTIONS["FORWARD"] if len(self.action_list) > 0: action = self.action_list.pop(0) return action if __name__ == "__main__": from snake import SnakeGame from gui import MatplotlibGui, TerminalGui, YeetTerminalGui, NoGui snake_game = SnakeGame(6, 7) # keyboard_listener = KeyboardListener(snake_game) player = Player(snake_game, best_engine, YeetTerminalGui(), speed=1) player.play_game()
if game.game_over: reward = -10 elif game.just_ate_fruit: reward = 1 elif num_moves_without_fruit > max_moves_without_fruit: reward = -1 num_moves_without_fruit = 0 else: reward = 0 memory.add_to_memory(observation, action, reward) if game.game_over: #### total_reward = sum(memory.rewards) total_observation = numpy.stack(memory.observations, 0) total_action = numpy.array(memory.actions) total_rewards = discount_rewards(memory.rewards, gamma) train_step(snake_model, optimizer, total_observation, total_action, total_rewards) memory.clear() break for i in range(16): train(1250) print(i) game = SnakeGame(width, height, num_fruit=3) play_game(game, snake_model)
def play(model: Optional["tf.keras.Model"] = None, **override_game_opts): # with python 3.9, this could be SnakeGame(game_options | override_game_opts) opts = game_options.copy() opts.update(override_game_opts) game = SnakeGame(**opts) play_game(game, model)
################################################### # this will perform necessary steps to start the game and get it ready to perform, and return any values we will be manipulating. # will return these values as a list. def start_game(game): # this s passed by reference, so us starting the game within the function should work globally. a, b, c, d, e = game.start() # steps, prev_score, snake (a list), food (x,y), obstacles (list of x,y pairs). return [a,b,c,d,e] # will vary based on game. These are the things we will be using to train the neural net later. while active: # Create a Game object and get it ready. print("Initializing the game.") if(MODE == 1): game = SnakeGame(gui = True) elif(MODE == 0): game = SnakeGame() print("Game initialized.") sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) sock.bind((SERVER_IP, IN_PORT)) # bind the incoming port. print("Waiting for Client.") while not connected: try: data, addr = sock.recvfrom(1024) received_json = json.loads(data) if(received_json[0] == 0): # if client's code is "connecting" connected = True in_progress = True print("Client has connected: " + str(addr))
from curses import KEY_RIGHT, KEY_LEFT, KEY_UP, KEY_DOWN from snake import SnakeGame import random import numpy as np import tflearn from tflearn.layers.core import input_data, dropout, fully_connected from tflearn.layers.estimator import regression from statistics import median, mean from collections import Counter game = SnakeGame( True ) def randomGames(): for _ in range(5): game.reset( True ) for _ in range(200): win = game.getWindow() win.getch() action = game.sample() #print action state, reward, alive = game.step( action ) if not alive: break game.close() #randomGames() game.close() print game.getState() initial_games = 50000 goal_steps = 500
actions = choose_action(snake_model, observations, single=False) for game, action in zip(games, actions): game.tick(game_actions[action]) for memory, observation, action, game in zip( memories, observations, actions, games ): memory.add_to_memory(observation, action, reward(game)) for i in range(batch_size): if games[i].game_over: games[i] = SnakeGame(**opts) batch_memory = aggregate_memories(memories) train_step( snake_model, optimizer, observations=numpy.stack(batch_memory.observations, 0), actions=numpy.array(batch_memory.actions), discounted_rewards=discount_rewards( batch_memory.rewards, GAME_OVER_REWARD, gamma ), ) train(100, 100, 0.8) game = SnakeGame(**opts) play_game(game, snake_model)
def __init__(self, shape=(10, 12, 4), size=(20, 20)): self.NeuralNetwork = NeuralNetwork(shape) self.row, self.col = size self.game = SnakeGame(self.row, self.col)
from snake import SnakeGame import random sg = SnakeGame(human_player=False) #sg.run() while True: sg.clock.tick(10) print(sg.xs, sg.ys) sg.dirs = random.randint(0, 3) # 0=Down, 1=Right, 2=Up, 3=Left sg.next_step()
# Get data training_data = self.generate_training_data(initial_games, goal_steps) x = torch.tensor([i[0] for i in training_data]).reshape(-1, 5) t = torch.tensor([i[1] for i in training_data]).reshape(-1, 1) # Define loss and optimizer loss_func = nn.MSELoss() optimizer = torch.optim.Adam(self.model.parameters(), lr=lr) # Train network for epoch in range(max_iter): # Forward propagation y = self.model(x.float()) loss = loss_func(y, t.float()) print("epoch: ", epoch, " loss: ", loss.item()) # Zero the gradients optimizer.zero_grad() # Backward propagation loss.backward() # perform a backward pass (backpropagation) optimizer.step() # update parameters if __name__ == "__main__": from player import Player from gui import MatplotlibGui, TerminalGui engine = DNN_Engine(initial_games=500, lr=2e-2, max_iter=500) player = Player(SnakeGame(), engine, TerminalGui(), speed=1) player.play_game()