def test_snake_move(): snake = Snake(3, 3, Position(1, 1), Direction.RIGHT) snake.move(None) assert snake.head.current_position.x == 2 assert snake.head.current_position.y == 1 assert snake.direction == Direction.RIGHT snake.move(Direction.UP) assert snake.head.current_position.x == 2 assert snake.head.current_position.y == 0 assert snake.direction == Direction.UP
def initial_population(self): training_data = [] for i in range(self.init_games): print("Training Game:", i + 1) game = Snake() _, prev_score, snake, food = game.start() prev_observation = self.generate_observation(snake, food) prev_food_distance = self.get_food_distance(snake, food) for _ in range(self.goal_steps): action, game_action = self.generate_action(snake) done, score, snake, food = game.move(game_action) if done: training_data.append([ self.add_action_to_observation(prev_observation, action), -1 ]) # with open("training.txt", "a") as file: # file.write('Game' + str(i + 1) + '\t' + " ---------- " + str(training_data[-1]) + "\n") break else: food_distance = self.get_food_distance(snake, food) if score > prev_score or food_distance < prev_food_distance: training_data.append([ self.add_action_to_observation( prev_observation, action), 1 ]) else: training_data.append([ self.add_action_to_observation( prev_observation, action), 0 ]) prev_observation = self.generate_observation(snake, food) prev_food_distance = food_distance # with open("training.txt", "a") as file: # file.write('Game' + str(i+1)+'\t' + " ---------- " + str(training_data[-1]) + "\n") return training_data
def run_game(agent, game_number): # создать яблоко в позиции (20, 10) apple = Apple( Cell(WINDOW_WIDTH / WIDTH * round(WIDTH / 3), WINDOW_HEIGHT / HEIGHT * round(HEIGHT / 2), DISPLAY)) # создать змейку. Пусть она состоит из трех ячеек # в строке 10 и столбцах 3, 4, 5. # Какой тип данных удобен для представления змейки? snake = Snake( Cell(WINDOW_WIDTH / WIDTH * 4, WINDOW_HEIGHT / HEIGHT * round(HEIGHT / 4), DISPLAY), Cell(WINDOW_WIDTH / WIDTH * 3, WINDOW_HEIGHT / HEIGHT * round(HEIGHT / 4), DISPLAY), Cell(WINDOW_WIDTH / WIDTH * 2, WINDOW_HEIGHT / HEIGHT * round(HEIGHT / 4), DISPLAY)) # Reset player score. SCORE.player_score = 0 # Initialize list containing tuples: (action, current state). action_state_list = [] action_counter = 0 for event in pygame.event.get(): if event.type == pygame.QUIT: terminate() # обработайте событие pygame.KEYDOWN # и при необходимости измените направление движения змейки. if event.type == pygame.KEYDOWN and event.key == pygame.K_ESCAPE: # ESC key pressed pygame.quit() sys.exit() if event.type == pygame.KEYDOWN: snake.direction = get_direction( event.key, snake.direction) # Other key pressed # If any direction key was pressed - assign corresponding action. action = snake.direction steps = 0 # steps since the last positive reward while (not any( (snake_hit_self(snake), snake_hit_edge(snake)))) and (steps < 100): # Before snake does its first move, assign action = 'None'. action_counter += 1 action = 'none' # Previous snake direction. We'll use as one of the current state parameters for evaluation. snake_prev_direction = snake.direction if not params['train']: agent.epsilon = 0.01 else: # agent.epsilon is set to give randomness to actions agent.epsilon = 1 - (game_number * params['epsilon_decay_linear']) # get previous environment state. state_old = get_state( get_state_in_json(player_score=SCORE.player_score, high_score=SCORE.high_score, head_pos=get_snake_new_head(snake), snake_pos=snake.body, apple_pos=apple.apple, prev_direction=snake_prev_direction)) head_apple_distance_old_x, head_apple_distance_old_y = abs(get_snake_new_head(snake)[0] - apple.apple.x),\ abs(get_snake_new_head(snake)[1] - apple.apple.y) # perform random actions based on agent.epsilon, or choose the action if random.uniform(0, 1) < agent.epsilon: snake.turn(matrix=np.eye(3)[random.randint(0, 2)], prev_direction=snake_prev_direction, move_list=SNAKE_MOVE) else: # predict action based on the old state with torch.no_grad(): state_old_tensor = torch.tensor(state_old.reshape((1, 12)), dtype=torch.float32).to(DEVICE) prediction = agent(state_old_tensor) snake.turn(matrix=np.eye(3)[np.argmax( prediction.detach().cpu().numpy()[0])], prev_direction=snake_prev_direction, move_list=SNAKE_MOVE) # сдвинуть змейку в заданном направлении snake.move() # обработайте ситуацию столкновения змейки с яблоком. # В этом случае нужно: # * Увеличить размер змейки # * Создать новое яблоко. if snake_hit_apple(snake, apple): snake.grow() apple.spawn([(block.x, block.y) for block in snake.body ]) # check apple does not spawn on snake. SCORE.score() # Calculate new environment state after snake moved. state_new = get_state( get_state_in_json(player_score=SCORE.player_score, high_score=SCORE.high_score, head_pos=get_snake_new_head(snake), snake_pos=snake.body, apple_pos=apple.apple, prev_direction=snake_prev_direction)) head_apple_distance_new_x, head_apple_distance_new_y = abs(get_snake_new_head(snake)[0] - apple.apple.x),\ abs(get_snake_new_head(snake)[1] - apple.apple.y) # Set reward for the new state. reward = agent.set_reward(snake, apple, head_apple_distance_new_x, head_apple_distance_old_x, head_apple_distance_new_y, head_apple_distance_old_y) # If snake hit apple or moved towards it, reset steps counter to 0. if reward > 0: steps = 0 if params['train']: # train short memory base on the new action and state agent.train_short_memory( state_old, snake.turn_direction, reward, state_new, any((snake_hit_self(snake), snake_hit_edge(snake)))) # store the new data into a long term memory agent.remember(state_old, snake.turn_direction, reward, state_new, any((snake_hit_self(snake), snake_hit_edge(snake)))) # передать яблоко в функцию отрисовки кадра # передать змейку в функцию отрисовки кадра if params['display']: draw_frame(snake, apple, SCORE) FPS_CLOCK.tick(FPS) steps += 1 # Appending the current action (could be 'none') and the current state of the snake to # the list - "Action-State List". action_state_list.append( ({ f"Action {action_counter}": action }, get_state_in_json(player_score=SCORE.player_score, high_score=SCORE.high_score, head_pos=get_snake_new_head(snake), snake_pos=snake.body, apple_pos=apple.apple, prev_direction=snake_prev_direction))) # "Action-State List" to current game and write json on disk. STATE[f"Game #{game_number}"] = action_state_list # если змейка достигла границы окна, завершить игру. # Для проверки воспользуйтесь функцией snake_hit_edge. if snake_hit_edge(snake): write_state_to_file(STATE, CURRENT_TIME) # если змейка задела свой хвост, завершить игру. # Для проверки восппользуйтесь функцией snake_hit_self. if snake_hit_self(snake): write_state_to_file(STATE, CURRENT_TIME)
snake, food = population_list[i] steps = 0 while True: time_delta = 0 screen.fill((255,255,255)) manager.update(time_delta) slider.update(time_delta) manager.draw_ui(screen) for event in pygame.event.get(): if event.type == pygame.QUIT: sys.exit() manager.process_events(event) snake.move() Xdata = get_Xdata(snake, food, steps) if Xdata is None: snake.kill() if not snake.is_alive: population_list.pop(i, None) scores[i] = [snake.length-2, steps] break Ydata = nn_list[i](Xdata) out = numpy.where(Ydata == numpy.max(Ydata))[0][0] if out == 0 and snake.direction != 'DOWN': snake.turn('UP') elif out == 1 and snake.direction != 'LEFT': snake.turn('RIGHT')