def main(): gamesToPlay = 30 maxRoundsPerGame = 100 samplingWidth = 10 botClass = MctsBotCpp # botClass = MctsBotPy searchManager = GridSearchManager(defaultConfig={}) # searchManager.add_param_axis('mctsBudget', [100, 1000, 10000, 100000, 300000]) searchManager.add_param_axis('mctsBudget', [100, 1000, 10000]) # searchManager.add_param_axis('explorationWeight', [20, 30, 50]) searchManager.add_param_axis('explorationWeight', [20]) outDirPath = Path( os.environ['DEV_OUT_PATH'] ) / 'azul_bot' if 'DEV_OUT_PATH' in os.environ else Path.cwd() outDirPath.mkdir(parents=True, exist_ok=True) resultRows = [] for config, _, _ in searchManager.generate_configuration(): azul = Azul() # players = [build_random_bot(), build_mcts_bot(mctsBudget)] players = [ build_greedy_bot(), MctsBotWrapper(config['mctsBudget'], samplingWidth, config['explorationWeight'], botClass=botClass) ] scores = [] timePerMove = [] timer = StageTimer() for iGame in range(gamesToPlay): timer.start_pass() state = azul.get_init_state() roundCount = 0 moveCount = 0 for _ in range(maxRoundsPerGame): timer.start_stage('deal') state = azul.deal_round(state) while not azul.is_round_end(state): timer.start_stage('decide') move = players[state.nextPlayer](state) timer.start_stage('move') state = azul.apply_move_without_scoring(state, move).state moveCount += 1 if state.nextPlayer == 1 else 0 timer.start_stage('score') state = azul.score_round(state) roundCount += 1 if azul.is_game_end(state): state = azul.score_game(state) break timer.end_stage() if azul.is_game_end(state): timer.end_pass() dur = timer.get_pass_duration() scores.append([state.players[0].score, state.players[1].score]) timePerMove.append(timer.get_pass_timings()['decide'] / moveCount) print( f"Finished a game with scores {state.players[0].score}:{state.players[1].score}" f" in {roundCount} rounds and {dur:.2f} s.") else: print("Timed out playing a game.") timer.end() scoresArray = np.array(scores) scoresAvg = scoresArray.mean(axis=0) winsFirst = np.count_nonzero(scoresArray[:, 0] > scoresArray[:, 1]) winsSecond = np.count_nonzero(scoresArray[:, 0] < scoresArray[:, 1]) # Could be a draw. for i in range(scoresArray.shape[0]): resultRows.append({ 'player': 'greedy', 'budget': 0, 'explorationWeight': 0, 'score': scoresArray[i, 0], 'isWin': scoresArray[i, 0] > scoresArray[i, 1] }) resultRows.append({ 'player': 'mcts', 'budget': config['mctsBudget'], 'explorationWeight': config['explorationWeight'], 'score': scoresArray[i, 1], 'isWin': scoresArray[i, 0] < scoresArray[i, 1] }) print(timer.get_total_report()) print("Average scores: {:.1f} {:.1f}".format(*tuple(scoresAvg))) print("Average time per move: {:.1f}".format( np.mean(np.array(timePerMove)))) print("Wins: {} vs {}".format(winsFirst, winsSecond)) print("Plotting.") resultTable = pd.DataFrame(resultRows) print(resultTable) # resultTable = resultTable[resultTable['player'] == 'mcts'] ax = sns.boxplot(x='budget', y='score', hue='explorationWeight', data=resultTable) date = datetime.now() dateStr = date.strftime("%y%m%d-%H%M%S") ax.get_figure().savefig(outDirPath / f'{dateStr}_scores.pdf') plt.show() resultTable.to_csv(outDirPath / f'{dateStr}_metrics.csv', sep='\t') print("Done.")
class AzulCmd(cmd.Cmd): prompt = '> ' def __init__(self): # Init with defaults. super().__init__() self.history = [] # type: List[AzulState] self.azul = Azul() self.state = self.azul.get_init_state() self.state = self.azul.deal_round(self.state) self.botPlayerIndex = 0 # self.budget = 100000 self.budget = 1000 self.samplingWidth = 10 self.explorationWeight = 20 def preloop(self) -> None: super().preloop() Azul.print_state(self.state) def postcmd(self, stop: bool, line: str) -> bool: Azul.print_state(self.state) return super().postcmd(stop, line) def do_move(self, arg: str): bits = list(map(lambda s: s.strip(), arg.strip().split(' '))) # Parse the command. try: move = Move(int(bits[0]), Azul.str_to_color(bits[1]), int(bits[2])) except ValueError: print("# Invalid command") return self._apply_move(move) def do_bot_move(self, arg: str): bot = MctsBot(self.azul, self.state, samplingWidth=self.samplingWidth, explorationWeight=self.explorationWeight) move = bot.step_n(self.budget) print(f"Bot's move: ") print( f"Take {Azul.color_to_str(move.color)} from bin {move.sourceBin} to queue {move.targetQueue}" ) self._apply_move(move) def _apply_move(self, move): if self.azul.is_game_end(self.state): print("The game is over, can't do moves.") return self.history.append(self.state) # Apply the move. try: outcome = self.azul.apply_move_without_scoring(self.state, move) self.state = outcome.state if self.azul.is_round_end(self.state): self.state = self.azul.score_round(self.state) if not self.azul.is_game_end(self.state): self.state = self.azul.deal_round(self.state) else: self.state = self.azul.score_game(self.state) winnerIndex = int(self.state.players[0].score > self.state.players[1].score) humanIndex = 1 - self.botPlayerIndex print(f"=== Game Over! ===") print( f"{'Human' if winnerIndex != self.botPlayerIndex else 'Bot'} player wins" ) print( f"Scores: Bot = {self.state.players[self.botPlayerIndex].score} " f"Human = {self.state.players[humanIndex].score}") except ValueError as e: print(f"# {e}") print("Undoing the move.") self.state = self.history.pop() def do_undo(self, arg: str): if len(self.history) == 0: print("# This is the first turn.") return print("# UNDO #") self.state = self.history.pop()
def test_full_game(self): # Test a full recorded game. azul = Azul() state = azul.get_init_state() tilesPerRoundRaw = [ 'RWYYRYUURRWWKKYYWWUR', 'WWUUUURKKKRUYKRUKYYU', 'URYKUWYYURYYKKURWWYK', 'RKKWUUWWRRKUUKWWWKRR', 'KYYRRYWKYYUWWWKRYRKU' ] tilesPerRound = [ list(map(Azul.str_to_color, tiles)) for tiles in tilesPerRoundRaw ] # Moves alternate between players 0 and 1 within a round. # The game keeps track of which player goes first each round, we only specify the very first. firstPlayer = 0 movesPerRoundRaw = [[ '3K1', '4W3', '0Y3', '2W3', '5R2', '5Y1', '5W0', '1R0', '5U4', '5Y5' ], [ '4Y3', '2K4', '0U4', '1U2', '3K0', '5Y0', '5R1', '5K4', '5U2', '5W1' ], [ '4K4', '2Y1', '5U2', '1Y2', '0K0', '3R0', '5U1', '5Y2', '5K3', '5W4', '5R5' ], [ '3W2', '1W4', '2R4', '5U3', '5K3', '0R5', '5W2', '5K2', '4R1', '5K2', '5W0' ], [ '2U1', '4U0', '3W2', '5R4', '5K3', '5Y2', '5W2', '1R4', '5Y0', '5K1', '0K3', '5Y3', '5R4', '5W5' ]] movesPerRound = [ list(map(Move.from_str, moves)) for moves in movesPerRoundRaw ] scoresPerRound = [[4, 3], [19, 5], [33, 16], [45, 37], [54, 55]] finalScores = [70, 71] # Simulate the game and check the score each round. state.nextPlayer = firstPlayer for iRound, (tiles, moves, scores) in enumerate( zip(tilesPerRound, movesPerRound, scoresPerRound)): state = azul.deal_round(state, tiles) for move in moves: state = azul.apply_move_without_scoring(state, move).state self.assertTrue(azul.is_round_end(state)) state = azul.score_round(state) self.assertEqual([p.score for p in state.players], scores) state = azul.score_game(state) self.assertEqual([p.score for p in state.players], finalScores)