def test_play_monopoly_no_monopoly_card(): players = [SimplePlayer(Color.RED), SimplePlayer(Color.BLUE)] game = Game(players) action_to_execute = Action(players[0].color, ActionType.PLAY_MONOPOLY, ORE) with pytest.raises(ValueError): # no monopoly game.execute(action_to_execute)
def test_play_year_of_plenty_no_year_of_plenty_card(): players = [SimplePlayer(Color.RED), SimplePlayer(Color.BLUE)] game = Game(players) action_to_execute = Action( players[0].color, ActionType.PLAY_YEAR_OF_PLENTY, [ORE, WHEAT] ) with pytest.raises(ValueError): # no year of plenty card game.execute(action_to_execute)
def test_vps_to_win_config(): players = [ RandomPlayer(Color.RED), RandomPlayer(Color.BLUE), ] game = Game(players, vps_to_win=4) game.play() winning_color = game.winning_color() vps = get_actual_victory_points(game.state, winning_color) assert vps >= 4 and vps < 6
def test_create_sample_speed(benchmark): players = [ SimplePlayer(Color.RED), SimplePlayer(Color.BLUE), SimplePlayer(Color.WHITE), SimplePlayer(Color.ORANGE), ] game = Game(players) for _ in range(30): game.play_tick() sample = benchmark(create_sample, game, players[1].color) assert isinstance(sample, dict) assert len(sample) > 0
def test_play_year_of_plenty_not_enough_resources(): players = [SimplePlayer(Color.RED), SimplePlayer(Color.BLUE)] player_to_act = players[0] game = Game(players) game.state.resource_freqdeck = [0, 0, 0, 0, 0] player_deck_replenish(game.state, player_to_act.color, YEAR_OF_PLENTY) action_to_execute = Action( player_to_act.color, ActionType.PLAY_YEAR_OF_PLENTY, [ORE, WHEAT], ) with pytest.raises(ValueError): # not enough cards in bank game.execute(action_to_execute)
def test_copy(): """Play 30 moves, copy game, ensure they look the same but not the same.""" players = [ SimplePlayer(Color.RED), SimplePlayer(Color.BLUE), SimplePlayer(Color.WHITE), SimplePlayer(Color.ORANGE), ] game = Game(players) for i in range(30): game.play_tick() game_copy = game.copy() assert json.dumps(game, cls=GameEncoder) == json.dumps(game_copy, cls=GameEncoder) assert game_copy != game
def test_second_placement_takes_cards_from_bank(): players = [ SimplePlayer(Color.RED), SimplePlayer(Color.BLUE), SimplePlayer(Color.WHITE), SimplePlayer(Color.ORANGE), ] game = Game(players) assert sum(game.state.resource_freqdeck) == 19 * 5 while not any( a.action_type == ActionType.ROLL for a in game.state.playable_actions ): game.play_tick() assert sum(game.state.resource_freqdeck) < 19 * 5
def test_end_turn_goes_to_next_player(fake_roll_dice): fake_roll_dice.return_value = (1, 2) # not a 7 players = [SimplePlayer(Color.RED), SimplePlayer(Color.BLUE)] game = Game(players) actions = [] while not any( a.action_type == ActionType.ROLL for a in game.state.playable_actions ): actions.append(game.play_tick()) p0_color = game.state.colors[0] p1_color = game.state.colors[1] assert ( game.state.current_prompt == ActionPrompt.PLAY_TURN and game.state.current_color() == p0_color ) assert game.state.playable_actions == [Action(p0_color, ActionType.ROLL, None)] game.execute(Action(p0_color, ActionType.ROLL, None)) assert game.state.current_prompt == ActionPrompt.PLAY_TURN assert game.state.current_color() == p0_color assert player_has_rolled(game.state, p0_color) assert Action(p0_color, ActionType.END_TURN, None) in game.state.playable_actions game.execute(Action(p0_color, ActionType.END_TURN, None)) assert game.state.current_prompt == ActionPrompt.PLAY_TURN assert game.state.current_color() == p1_color assert not player_has_rolled(game.state, p0_color) assert not player_has_rolled(game.state, p1_color) assert game.state.playable_actions == [Action(p1_color, ActionType.ROLL, None)]
def test_to_json_speed(benchmark): players = [ SimplePlayer(Color.RED), SimplePlayer(Color.BLUE), SimplePlayer(Color.ORANGE), SimplePlayer(Color.WHITE), ] game = Game(players) result = benchmark(json.dumps, game, cls=GameEncoder) assert isinstance(result, str)
def test_copy_speed(benchmark): players = [ SimplePlayer(Color.RED), SimplePlayer(Color.BLUE), SimplePlayer(Color.ORANGE), SimplePlayer(Color.WHITE), ] game = Game(players) result = benchmark(game.copy) assert result.seed == game.seed
def reset(self): p0 = Player(Color.BLUE) players = [p0, RandomPlayer(Color.RED)] game = Game(players=players) self.game = game self.p0 = p0 self._advance_until_p0_decision() observation = create_sample_vector(self.game, self.p0.color) return observation
def play_batch_core(num_games, players, game_config, accumulators=[]): for accumulator in accumulators: if isinstance(accumulator, SimulationAccumulator): accumulator.before_all() for _ in range(num_games): for player in players: player.reset_state() catan_map = (CatanMap(MINI_MAP_TEMPLATE) if game_config.catan_map == "MINI" else CatanMap(BASE_MAP_TEMPLATE)) game = Game( players, discard_limit=game_config.discard_limit, vps_to_win=game_config.vps_to_win, catan_map=catan_map, ) game.play(accumulators) yield game for accumulator in accumulators: if isinstance(accumulator, SimulationAccumulator): accumulator.after_all()
def test_play_many_games(): for _ in range(10): # play 10 games players = [ RandomPlayer(Color.RED), RandomPlayer(Color.BLUE), RandomPlayer(Color.WHITE), RandomPlayer(Color.ORANGE), ] game = Game(players) game.play() # Assert everything looks good for color in game.state.colors: cities = len( get_player_buildings(game.state, color, BuildingType.CITY)) settlements = len( get_player_buildings(game.state, color, BuildingType.SETTLEMENT)) longest = get_longest_road_color(game.state) == color largest = get_largest_army(game.state)[0] == color devvps = get_dev_cards_in_hand(game.state, color, VICTORY_POINT) assert (settlements + 2 * cities + 2 * longest + 2 * largest + devvps) == get_actual_victory_points(game.state, color)
def get_feature_ordering(num_players=4): global FEATURE_ORDERING if FEATURE_ORDERING is None: players = [ SimplePlayer(Color.RED), SimplePlayer(Color.BLUE), SimplePlayer(Color.WHITE), SimplePlayer(Color.ORANGE), ] players = players[:num_players] game = Game(players) sample = create_sample(game, players[0].color) FEATURE_ORDERING = sorted(sample.keys()) return FEATURE_ORDERING
def decide(self, game: Game, playable_actions): """ For each move, will run N playouts, get statistics, and save into replay buffer. Every M decisions, will: - flush replay buffer to disk (for offline experiments) - report progress on games thus far to TensorBoard (tf.summary module) - update model by choosing L random samples from replay buffer and train model. do we need stability check? i think not. and override model path. Decision V1 looks like, predict and choose the one that creates biggest 'distance' against enemies. Actually this is the same as maximizing wins. Decision V2 looks the same as V1, but minimaxed some turns in the future. """ if len(playable_actions ) == 1: # this avoids imbalance (if policy-learning) return playable_actions[0] start = time.time() # Run MCTS playouts for each possible action, save results for training. samples = [] scores = [] print(playable_actions) for action in playable_actions: print("Considering", action) action_applied_game_copy = game.copy() action_applied_game_copy.execute(action) sample = create_sample_vector(action_applied_game_copy, self.color) samples.append(sample) if TRAIN: # Save snapshots from the perspective of each player (more training!) counter = run_playouts(action_applied_game_copy, NUM_PLAYOUTS) mcts_labels = {k: v / NUM_PLAYOUTS for k, v in counter.items()} DATA_LOGGER.consume(action_applied_game_copy, mcts_labels) scores.append(mcts_labels.get(self.color, 0)) # TODO: if M step, do all 4 things. if TRAIN and self.step % FLUSH_EVERY == 0: self.update_model_and_flush_samples() # scores = get_model().call(tf.convert_to_tensor(samples)) best_idx = np.argmax(scores) best_action = playable_actions[best_idx] if TRAIN: print("Decision took:", time.time() - start) self.step += 1 return best_action
def test_serialization(): game = Game(players=[ SimplePlayer(Color.RED), SimplePlayer(Color.BLUE), SimplePlayer(Color.WHITE), SimplePlayer(Color.ORANGE), ]) string = json.dumps(game, cls=GameEncoder) result = json.loads(string) # Loosely assert looks like expected assert isinstance(result["robber_coordinate"], list) assert isinstance(result["tiles"], list) assert isinstance(result["edges"], list) assert isinstance(result["nodes"], dict) assert isinstance(result["actions"], list)
def decide(self, game: Game, playable_actions: Iterable[Action]): """Should return one of the playable_actions. Args: game (Game): complete game state. read-only. playable_actions (Iterable[Action]): options to choose from Return: action (Action): Chosen element of playable_actions """ decided_action = super().decide(game, playable_actions) # Log simple dataset of simple features and MCTS Score results = run_playouts(game.copy(), NUM_SIMULATIONS) vector = simple_feature_vector(game, self.color) vector["LABEL"] = results[self.color] / float(NUM_SIMULATIONS) RECORDS.append(vector) return decided_action
def test_execute_action_on_copies_doesnt_conflict(): players = [ SimplePlayer(Color.RED), SimplePlayer(Color.BLUE), SimplePlayer(Color.WHITE), SimplePlayer(Color.ORANGE), ] game = Game(players) p0_color = game.state.colors[0] game.execute(Action(p0_color, ActionType.BUILD_SETTLEMENT, 0)) action = Action(p0_color, ActionType.BUILD_ROAD, (0, 1)) game_copy = game.copy() game_copy.execute(action) game_copy = game.copy() game_copy.execute(action) game.execute(action)
def decide(self, game: Game, playable_actions): if len(playable_actions) == 1: return playable_actions[0] best_value = float("-inf") best_actions = [] for action in playable_actions: game_copy = game.copy() game_copy.execute(action) key = player_key(game_copy.state, self.color) value = game_copy.state.player_state[f"{key}_ACTUAL_VICTORY_POINTS"] if value == best_value: best_actions.append(action) if value > best_value: best_value = value best_actions = [action] return random.choice(best_actions)
def test_rolling_a_seven_triggers_default_discard_limit(fake_roll_dice): fake_roll_dice.return_value = (1, 6) players = [SimplePlayer(Color.RED), SimplePlayer(Color.BLUE)] game = Game(players) while not any( a.action_type == ActionType.ROLL for a in game.state.playable_actions ): game.play_tick() until_nine = 9 - player_num_resource_cards(game.state, players[1].color) player_deck_replenish(game.state, players[1].color, WHEAT, until_nine) assert player_num_resource_cards(game.state, players[1].color) == 9 game.play_tick() # should be player 0 rolling. assert len(game.state.playable_actions) == 1 assert game.state.playable_actions == [ Action(players[1].color, ActionType.DISCARD, None) ] game.play_tick() assert player_num_resource_cards(game.state, players[1].color) == 5
def decide(self, game: Game, playable_actions): # if len(game.state.actions) > 10: # import sys # sys.exit(1) actions = list_prunned_actions(game) if self.prunning else playable_actions if len(actions) == 1: return actions[0] start = time.time() root = StateNode(self.color, game.copy(), None, self.prunning) for _ in range(self.num_simulations): root.run_simulation() print( f"{str(self)} took {time.time() - start} secs to decide {len(playable_actions)}" ) return root.choose_best_action()
def decide(self, game: Game, playable_actions): actions = self.get_actions(game) if len(actions) == 1: return actions[0] if self.epsilon is not None and random.random() < self.epsilon: return random.choice(playable_actions) start = time.time() state_id = str(len(game.state.actions)) node = DebugStateNode(state_id, self.color) # i think it comes from outside deadline = start + MAX_SEARCH_TIME_SECS result = self.alphabeta(game.copy(), self.depth, float("-inf"), float("inf"), deadline, node) # print("Decision Results:", self.depth, len(actions), time.time() - start) # if game.state.num_turns > 10: # render_debug_tree(node) # breakpoint() return result[0]
def after(self, game: Game): winner = game.winning_color() if winner is None: return # throw away data for color in game.state.colors: cities = len( get_player_buildings(game.state, color, BuildingType.CITY)) settlements = len( get_player_buildings(game.state, color, BuildingType.SETTLEMENT)) longest = get_longest_road_color(game.state) == color largest = get_largest_army(game.state)[0] == color devvps = get_dev_cards_in_hand(game.state, color, VICTORY_POINT) self.cities[color] += cities self.settlements[color] += settlements self.longest[color] += longest self.largest[color] += largest self.devvps[color] += devvps self.num_games += 1
def decide(self, game: Game, playable_actions): if len(playable_actions) == 1: return playable_actions[0] start = time.time() # num_playouts = PLAYOUTS_BUDGET // len(playable_actions) num_playouts = self.num_playouts best_action = None max_wins = None for action in playable_actions: action_applied_game_copy = game.copy() action_applied_game_copy.execute(action) counter = run_playouts(action_applied_game_copy, num_playouts) wins = counter[self.color] if max_wins is None or wins > max_wins: best_action = action max_wins = wins print(f"Greedy took {time.time() - start} secs to decide " + f"{len(playable_actions)} at {num_playouts} per action") return best_action
def test_seven_cards_dont_trigger_discarding(fake_roll_dice): fake_roll_dice.return_value = (1, 6) players = [SimplePlayer(Color.RED), SimplePlayer(Color.BLUE)] # Play initial build phase game = Game(players) while not any( a.action_type == ActionType.ROLL for a in game.state.playable_actions ): game.play_tick() until_seven = 7 - player_num_resource_cards(game.state, players[1].color) player_deck_replenish(game.state, players[1].color, WHEAT, until_seven) assert player_num_resource_cards(game.state, players[1].color) == 7 game.play_tick() # should be player 0 rolling. assert not any( a.action_type == ActionType.DISCARD for a in game.state.playable_actions )
def test_initial_build_phase(): players = [SimplePlayer(Color.RED), SimplePlayer(Color.BLUE)] game = Game(players) actions = [] while not any( a.action_type == ActionType.ROLL for a in game.state.playable_actions ): actions.append(game.play_tick()) p0_color = game.state.colors[0] assert ( actions[0].action_type == ActionType.BUILD_SETTLEMENT and actions[0].color == p0_color ) assert ( actions[1].action_type == ActionType.BUILD_ROAD and actions[1].color == p0_color ) assert ( actions[2].action_type == ActionType.BUILD_SETTLEMENT and actions[2].color != p0_color ) assert ( actions[3].action_type == ActionType.BUILD_ROAD and actions[3].color != p0_color ) assert ( actions[4].action_type == ActionType.BUILD_SETTLEMENT and actions[4].color != p0_color ) assert ( actions[5].action_type == ActionType.BUILD_ROAD and actions[5].color != p0_color ) assert ( actions[6].action_type == ActionType.BUILD_SETTLEMENT and actions[6].color == p0_color ) assert ( actions[7].action_type == ActionType.BUILD_ROAD and actions[7].color == p0_color ) assert ( game.state.current_prompt == ActionPrompt.PLAY_TURN and game.state.current_color() == p0_color ) assert game.state.player_state["P0_ACTUAL_VICTORY_POINTS"] == 2 assert game.state.player_state["P1_ACTUAL_VICTORY_POINTS"] == 2 assert game.state.player_state["P0_VICTORY_POINTS"] == 2 assert game.state.player_state["P1_VICTORY_POINTS"] == 2 # assert there are 4 houses and 4 roads settlements = [ building for building in game.state.board.buildings.values() if building[1] == BuildingType.SETTLEMENT ] assert len(settlements) == 4 # assert should be house-road pairs, or together paths = game.state.board.continuous_roads_by_player(players[0].color) assert len(paths) == 1 or ( len(paths) == 2 and len(paths[0]) == 1 and len(paths[1]) == 1 ) # assert should have resources from last house. # can only assert <= 3 b.c. player might place on a corner desert assert player_num_resource_cards(game.state, players[0].color) <= 3 assert player_num_resource_cards(game.state, players[1].color) <= 3
def test_can_play_for_a_bit(): # assert no exception thrown players = [SimplePlayer(Color.RED), SimplePlayer(Color.BLUE)] game = Game(players) for _ in range(10): game.play_tick()
size = 0 objects = [obj] while objects: need_referents = [] for obj in objects: if not isinstance(obj, BLACKLIST) and id(obj) not in seen_ids: seen_ids.add(id(obj)) size += sys.getsizeof(obj) need_referents.append(obj) objects = get_referents(*need_referents) return size game = Game([ RandomPlayer(Color.RED), RandomPlayer(Color.BLUE), RandomPlayer(Color.WHITE), RandomPlayer(Color.ORANGE), ]) game.play() print(sys.getsizeof(game)) print(getsize(game)) print(game) start = time.time() copy.deepcopy(game) end = time.time() print("copy.deepcopy(game) took", end - start, "seconds") start = time.time() game.copy() end = time.time()