def play_game(players, num_samples=100, num_simulations=None, total_simulation_seconds=1): tiles_by_player = deal_tiles() first_player = random.choice([0, 1, 2, 3]) state = DominoState(first_player, { 'tiles_by_player': tiles_by_player, 'suits_at_ends': set() }) game = [state] log(f"Starts player {first_player}") log("Tiles : ") log(pformat(state._tiles_by_player)) while not state.is_terminal(): log("=======================================") log(pformat(state._tiles_by_player[state._current_player])) state = play_with_algo( players[state._current_player], state, game, num_samples, num_simulations=num_simulations, total_simulation_seconds=total_simulation_seconds) game.append(state) print_state(state) log(f"winneeeer {state.calc_reward()}") log(pformat(state._tiles_by_player)) record_winner(state._tiles_by_player) return (game, state.calc_reward())
def test_is_terminal_game_closed(self): state = DominoState( 1,{ 'tiles_by_player':[ [{5,4}], [{3, 5}], [{6} ], [{1,0}] ], 'suits_at_ends':{2} }) self.assertTrue(state.is_terminal())
def test_is_terminal_team1_win(self): state = DominoState( 0,{ 'tiles_by_player':[ [{5,4}], [{3, 5}], [{6} ], [] ], 'suits_at_ends':{2} }) self.assertTrue(state.is_terminal())
def test_calc_reward_team2_win_by_points(self): state = DominoState( 1,{ 'tiles_by_player':[ [{5,4}], [{3, 5}], [{6} ], [{5}] ], 'suits_at_ends':{2} }) self.assertEqual(state.calc_reward(), -1)
def test_calc_reward_team1_win(self): state = DominoState( 1,{ 'tiles_by_player':[ [], [{3, 5}], [{4} ], [{5}] ], 'suits_at_ends':{4,3} }) self.assertEqual(state.calc_reward(), 1)
def test_current_team_is_team1(self): state = DominoState( 2,{ 'tiles_by_player':[ [{0}, {1, 2}, {1, 5}, {5, 6}, {4, 6}, {6}], [{5}, {2, 5}, {0, 5}, {1, 3}, {2, 4}, {2}, {3, 5}], [{4}, {3}, {4, 5}, {0, 1}, {0, 6}, {1, 6}, {0, 2}], [{3, 4}, {0, 3}, {2, 6}, {1, 4}, {0, 4}, {3, 6}, {1}] ], 'suits_at_ends':{2,3} }) self.assertEqual(state.current_team(), 1)
def test_game_is_closed(self): state = DominoState( 1,{ 'tiles_by_player':[ [{4, 6}, {5,3}], [{3, 5}], [{4} ], [{4,1}] ], 'suits_at_ends':{2} }) self.assertTrue(state._game_is_closed())
def test_teams_have_same_amount_of_points(self): state = DominoState( 1,{ 'tiles_by_player':[ [{4, 6}, {5,1}], [{3, 1},{5} ], [{4} ], [{4,3}, {3,0}] ], 'suits_at_ends':{2} }) self.assertEqual(state._team_with_fewer_points(), 0)
def test_team2_has_fewer_points(self): state = DominoState( 1,{ 'tiles_by_player':[ [{4, 6}, {5,1}], [{3, 1}], [{4} ], [{4,3}] ], 'suits_at_ends':{2} }) self.assertEqual(state._team_with_fewer_points(), state.team_2)
def test_game_is_not_closed_if_no_suits_at_ends(self): state = DominoState( 1,{ 'tiles_by_player':[ [{4, 6}, {5,3}], [{3, 5}], [{4} ], [{4,1}] ], 'suits_at_ends':set() }) self.assertFalse(state._game_is_closed())
def test_pass_action_is_legal(self): state = DominoState( 1,{ 'tiles_by_player':[ [{0}, {1, 2}, {1, 5}, {5, 6}, {4, 6}, {2,3}], [{5}, {2, 5}, {0, 5}, {1, 3}, {2, 4}, {2}, {3, 5}], [{4}, {3}, {4, 5}, {0, 1}, {0, 6}, {1, 6}, {0, 2}], [{3, 4}, {0, 3}, {2, 6}, {1, 4}, {0, 4}, {3, 6}, {1}] ], 'suits_at_ends':{6} }) action = DominoAction(1, {-1}, None) self.assertTrue(state._is_action_legal(action))
def test_action_is_ilegal_not_current_player(self): state = DominoState( 1,{ 'tiles_by_player':[ [{0}, {1, 2}, {1, 5}, {5, 6}, {4, 6}, {6}], [{5}, {2, 5}, {0, 5}, {1, 3}, {2, 4}, {2}, {3, 5}], [{4}, {3}, {4, 5}, {0, 1}, {0, 6}, {1, 6}, {0, 2}], [{3, 4}, {0, 3}, {2, 6}, {1, 4}, {0, 4}, {3, 6}, {1}] ], 'suits_at_ends':{2,3} }) action = DominoAction(0, {1,2},2) self.assertFalse(state._is_action_legal(action))
def test_get_possible_moves_when_different_playable_suits(self): state = DominoState( 0,{ 'tiles_by_player':[ [{0}, {1, 2}, {1, 5}, {3,5}, {5, 6}, {4, 6}], [{5}, {0, 3}, {0, 5}, {1, 3}, {2, 4}, {6}], [{4}, {4, 5}, {0, 1}, {0, 6}, {1, 6}, {0, 2}], [{3, 4}, {2, 6}, {1, 4}, {0, 4}, {3, 6}, {1}] ], 'suits_at_ends':{5,3} }) actions = [ (action.player, action.tile, action.suit_played) for action in state.get_possible_actions() ] expected_actions = [(0,{1, 5},5), (0, {6, 5},5) , (0,{3, 5}, 3), (0, {3,5},5)] for action in actions: self.assertIn(action, expected_actions)
def test_get_possible_moves(self): state = DominoState( 1,{ 'tiles_by_player':[ [{0}, {1, 2}, {1, 5}, {6}, {5, 6}, {4, 6}], [{5}, {2, 5}, {0, 5}, {1, 3}, {2, 4}, {2}, {3, 5}], [{4}, {3}, {4, 5}, {0, 1}, {0, 6}, {1, 6}, {0, 2}], [{3, 4}, {0, 3}, {2, 6}, {1, 4}, {0, 4}, {3, 6}, {1}] ], 'suits_at_ends':{2,3} }) actions = state.get_possible_actions() for action in actions: self.assertIn(action.tile, [{2,5}, {1,3}, {2,4}, {2}, {3,5}]) self.assertEqual(action.player, 1)
def test_player_pass_if_no_tiles_to_play(self): state = DominoState( 0, { 'tiles_by_player': [[{2}], [{0, 2}, {2, 3}, {4}], [{0, 6}], [{0}]], 'suits_at_ends': {3, 6} }) print(mcts_decision(state).action)
def test_next_state_from_action_creates_deep_copy(self): tiles_by_player = [ [{0}, {1, 2}, {1, 5}, {6}, {5, 6}, {4, 6}], [{5}, {2, 5}, {0, 5}, {1, 3}, {2, 4}, {2}, {3, 5}], [{4}, {3}, {4, 5}, {0, 1}, {0, 6}, {1, 6}, {0, 2}], [{3, 4}, {0, 3}, {2, 6}, {1, 4}, {0, 4}, {3, 6}, {1}] ] state = DominoState( 1,{ 'tiles_by_player':copy.deepcopy(tiles_by_player), 'suits_at_ends':{2,3} }) next_state = state.next_state_from_action(DominoAction(1, {2,5},2)) next_state._tiles_by_player[0].remove({0}) self.assertEqual(tiles_by_player, state._tiles_by_player) self.assertIn({0}, state._tiles_by_player[0]) self.assertNotIn({0}, next_state._tiles_by_player[0]) self.assertIn({2,5}, state._tiles_by_player[1]) self.assertNotIn({2,5}, next_state._tiles_by_player[1])
def test_next_state_from_action(self): state = DominoState( 1,{ 'tiles_by_player':[ [{0}, {1, 2}, {1, 5}, {6}, {5, 6}, {4, 6}], [{5}, {2, 5}, {0, 5}, {1, 3}, {2, 4}, {2}, {3, 5}], [{4}, {3}, {4, 5}, {0, 1}, {0, 6}, {1, 6}, {0, 2}], [{3, 4}, {0, 3}, {2, 6}, {1, 4}, {0, 4}, {3, 6}, {1}] ], 'suits_at_ends':{2,3} }) next_state = state.next_state_from_action(DominoAction(1, {2,5}, 2)) self.assertEqual(next_state._current_player, 2) self.assertEqual(next_state._suits_at_ends, {5,3}) self.assertEqual(next_state._tiles_by_player, [ [{0}, {1, 2}, {1, 5}, {6}, {5, 6}, {4, 6}], [{5},{0, 5}, {1, 3}, {2, 4}, {2}, {3, 5}], [{4}, {3}, {4, 5}, {0, 1}, {0, 6}, {1, 6}, {0, 2}], [{3, 4}, {0, 3}, {2, 6}, {1, 4}, {0, 4}, {3, 6}, {1}] ])
class GameOfDomino(TwoPlayersGame): def __init__(self, players): self.players = players self.nplayer = 1 # player 1 starts self._state = DominoState() def possible_moves(self): return self._state.get_possible_actions() def make_move(self, move): # self.pile -= int(move) # remove bones. self._state = self._state.next_state_from_action(move) def win(self): # return self.pile<=0 # opponent took the last bone ? winner = self._state.calc_reward() team_1 = self._state.team_1 team_2 = self._state.team_2 if winner == 0: return False if winner == team_1 and self.nplayer == 1: return True if winner == team_2 and self.nplayer == 2: return True return False def is_over(self): # return self.win() # Game stops when someone wins. return self._state.is_terminal() def show(self): # print ("%d bones left in the pile" % self.pile) print(self._state) def scoring(self): return 100 if self.win() else 0 # For the AI
def play_mcts(state, num_simulations=None, total_simulation_seconds=1): current_player = state._current_player tiles_by_player = state._tiles_by_player aux_state = DominoState( 0, { 'tiles_by_player': rotate(tiles_by_player, current_player), 'suits_at_ends': state._suits_at_ends }) root = TwoPlayersGameMonteCarloTreeSearchNode( state=DominoGameState(aux_state)) mcts = MonteCarloTreeSearch(root) best_action = mcts.best_action( simulations_number=num_simulations, total_simulation_seconds=total_simulation_seconds).state._state.action return state.next_state_from_action( DominoAction(current_player, best_action.tile, best_action.suit_played))
def pimc_decision(suits_at_ends, my_tiles, played_tiles, num_tiles_by_player, sample_size=100, mcts_simulations=None, total_simulation_seconds=1): first_hand_sample, second_hand_sample, third_hand_sample = sample_hands_uniformly( played_tiles, my_tiles, num_tiles_by_player, sample_size) for i in range(sample_size): first_hand_sample[i] = [set(tile) for tile in first_hand_sample[i]] second_hand_sample[i] = [set(tile) for tile in second_hand_sample[i]] third_hand_sample[i] = [set(tile) for tile in third_hand_sample[i]] decision_list = [] my_tiles = [set(tile) for tile in my_tiles] for i in range(sample_size): state = DominoState( 0, { 'tiles_by_player': [ list(my_tiles), list(first_hand_sample[i]), list(second_hand_sample[i]), list(third_hand_sample[i]) ], 'suits_at_ends': suits_at_ends }) decision_list.append( mcts_decision(state, num_simulations=mcts_simulations, total_simulation_seconds=total_simulation_seconds)) decision_list = [(frozenset(s.action.tile), s.action.suit_played) for s in decision_list] counter = collections.Counter(decision_list) tile, suit = counter.most_common(1)[0][0] return (set(tile), suit)
def __init__(self, players): self.players = players self.nplayer = 1 # player 1 starts self._state = DominoState()