def display(): pygame.init() clock = pygame.time.Clock() screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT), 0, 32) surface = pygame.Surface(screen.get_size()) surface = surface.convert() drawGrid(surface) player = Snake(0) player.lives = 10 enemy = Snake(1) enemy.positions = [(random.randint(0, SIZE - 1), random.randint(0, SIZE - 1))] food = Food([player]) myfont = pygame.font.SysFont("bahnschrift", 20) iterations = 0 while (player.lives > 0): for event in pygame.event.get(): if event.type == pygame.KEYDOWN: if event.key == pygame.K_1: print("SPace") clock.tick(0) #print(get_food_distance(player, food)) #getObsSmall([player, enemy], food) clock.tick(1) drawGrid(surface) enemy.handle_keys() obs = (player - food) # no random action in display phase action_space = np.array(q_table[obs]).copy() player.action(action_space, "QL") enemy.move() handleSnakeCollisions(player, enemy) handleFoodEating([player, enemy], food) getObsGrid([player, enemy], food, size=7, fullGrid=True) player.draw(surface) enemy.draw(surface) food.draw(surface) screen.blit(surface, (0, 0)) text1 = myfont.render("Score AI {0}".format(player.score), 1, (250, 250, 250)) text2 = myfont.render("Score Player {0}".format(enemy.score), 1, (250, 250, 250)) screen.blit(text1, (5, 10)) screen.blit(text2, (SCREEN_WIDTH - 175, 10)) pygame.display.update() print("Snake Player Final Score:", player.score)
def render(agent, obs_type: str, board_size: int): pygame.init() clock = pygame.time.Clock() screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT), 0, 32) surface = pygame.Surface(screen.get_size()) surface = surface.convert() drawGrid(surface) player = Snake(0) player.lives = 5 enemy = Snake(1) enemy.lives = 5 enemy.positions = [[5,5]] food = Food([player]) myfont = pygame.font.SysFont("bahnschrift", 20) iterations = 0 while (player.lives > 4 and enemy.lives > 4): clock.tick(4) drawGrid(surface) enemyMovement(food=food,enemy=enemy) # no random action in display phase if obs_type == "Grid": obs = getObsGrid([player], food, size=board_size, fullGrid=False) if obs_type == 'Small': obs = getObsSmall([player, enemy], food) action = agent.choose_action(obs) action_space = [action] player.action(action_space, "PG") handleSnakeCollisions(player,enemy) handleFoodEating([player, enemy], food) player.draw(surface) enemy.draw(surface) food.draw(surface) screen.blit(surface, (0, 0)) text1 = myfont.render("Score AI {0}".format(player.score), 1, (250, 250, 250)) #text2 = myfont.render("Score Player {0}".format(enemy.score), 1, (250, 250, 250)) screen.blit(text1, (5, 10)) #screen.blit(text2, (SCREEN_WIDTH - 175, 10)) pygame.display.update() print("Snake Player Final Score:", player.score)
class MyTestCase(unittest.TestCase): def test_something(self): self.assertEqual(True, False) def setUp(self): print("start test") self.butter = Food(4, 2, 1) def tearDown(self): print("test completed") def test_food(self): self.assertEqual(self.butter.energy_food(), 32) def test_food_type(self): self.assertFalse(self.butter.energy_food() > 10) def test_food_equal(self): self.assertEqual(self.butter.energy_food(), 34.2) def test_energy(self): self.assertFalse(self.butter.energy_food() == 0)
obs = getObsSmall(snakes, food) if player.reward == -DEATH_PENALTY: done = True info = "" return obs, player.reward, done, info score_history = [] score = 0 n_steps_history = [] for i in tqdm(range(num_episodes)): player = Snake(0) enemy = Snake(1) enemy.positions = [(random.randint(0, SIZE - 1), random.randint(0, SIZE - 1))] food = Food([player]) done = False score = 0 # returns a numpy array of the state we care about observation = getObsGrid(snakes=[player], food=food, size=OBS_GRID_SIZE, fullGrid=False) #observation = getObsSmall([player, enemy], food) n_steps = 0 while not done and n_steps < 100: n_steps += 1 # action needs to be either 0,1,2 or 3 action = agentAC.choose_action(observation) observation_, reward, done, info = step(snakes=[player, enemy], food=food, action=action, obs_type=obs_type) agentAC.learn(observation, reward, observation_, done) # For Actor-Critic #agent.store_rewards(reward) # For REINFORCE observation = observation_ score += reward
def test_food_type(self): bread = Food(0, 0, 0) assert bread.energy_food() == 0
def test_food(self): butter = Food(4, 2, 2) assert butter.energy_food() == 43.2
def test_energy(self): cabbage = Food(22, 92, 1) assert cabbage.energy_food() == 52
def test_food_double(self): nuts = Food(44, 93, 23) food = nuts * 2 assert food.energy_food() != 0
def test_food_equal(self): apple = Food(34, 2, 11) assert apple.energy_food() != 3
# for x1 in range(-SIZE+1, SIZE): # for y1 in range(-SIZE+1, SIZE): # q_table[(x1, y1)] = [np.random.uniform(-5, 0) for i in range(4)] if start_q_table is not None: print("Loaded q-table") with open(start_q_table, "rb") as f: q_table = pickle.load(f) episode_rewards = [] episode_scores = [] for episode in range(HM_EPISODES): player = Snake(0) enemy = Snake(1) food = Food([player, enemy]) show = False if episode % SHOW_EVERY == 0: print(f"on #{episode}, epsilon is {epsilon}") print( f"{SHOW_EVERY} ep mean: {np.mean(episode_rewards[-SHOW_EVERY:])}" ) print( f"{SHOW_EVERY} ep mean score: {np.mean(episode_scores[-SHOW_EVERY:])}" ) print(episode_scores) else: show = False episode_reward = 0 episode_score = 0
def setUp(self): print("start test") self.butter = Food(4, 2, 1)