def train(num_trials=40): score1 = 0 score2 = 0 player1 = 0 player2 = 0 for trial in range(num_trials): game = SnakeGame(board_size=(20, 25)) state = game.start_state() game.print_board(state) while True: action = minimax_agent_first_index(game, state) state = game.successor(state, action, True) if game.is_end(state)[0] == True: reward = game.is_end(state)[2] - state[3][1] incorporateFeedback(game, state, action, reward, succ) break game.print_board(state) current_dir = state[2][1] actions = get_valid(current_dir, game.actions()) action = get_QL_Action(game, state, actions) succ = game.successor(state, action) snake = succ[1][1] food = state[4] reward = succ[3][1] - state[3][1] #reward = 100*(succ[3][1]- state[3][1]) -((snake[0][0] - food[0])**2 + (snake[0][1] - food[1])**2) result = game.is_end(succ) state[0].addstr(28, 10, ' Reward: ' + str(reward) + ' ') state[0].addstr(29, 10, ' ScoreNow: ' + str(succ[3][1]) + ' ') state[0].addstr(30, 10, ' ScorePrev: ' + str(state[3][1]) + ' ') incorporateFeedback(game, state, action, reward, succ) game.print_board(state) state = succ if game.is_end(state)[0] == True: break global explorationProb explorationProb = explorationProb / 2 curses.endwin() '''
def play_snake_game(agent_one, agent_two): player1 = 0 player2 = 0 for i in range(100): states = [] game = SnakeGame(board_size=(20, 40)) state = game.start_state() states.append(state) game.print_board(state) while True: action = agent_one(game, state) state = game.successor(state, action, True) states.append(state) if game.is_end(state)[0] == True: break game.print_board(state) action = agent_two(game, state) state = game.successor(state, action, True) states.append(state) if game.is_end(state)[0] == True: break game.print_board(state) result = game.is_end(state) curses.endwin() if result[1] == 0: print("Tie game") elif result[1] == 1: #print("Agent 2 wins") player2 += 1 else: #print("Agent 1 wins") player1 += 1 ''' if state[5] == 1: print("Agent 1 score: " + str(result[2])) print("Agent 2 score: " + str(result[3])) else: print("Agent 1 score: " + str(result[3])) print("Agent 2 score: " + str(result[2])) ''' print("Player 1 wins: " + str(player1)) print("Player 2 wins: " + str(player2))