def cure_virus(simulation: Simulation, cure_card_combination: List[City], player: Character): choices = {ChooseCard(player, card) for card in cure_card_combination} assert choices == set(simulation.get_possible_actions()) for card in cure_card_combination: na = ChooseCard(player, card) assert na in simulation.get_possible_actions() simulation.step(na)
def create_less_random_simulation( start_player: Character = Character.SCIENTIST): state = State(player_count=2) player_state = PlayerState() player_state.clear_cards() state.players = { start_player: PlayerState(), Character.RESEARCHER if start_player == Character.SCIENTIST else Character.SCIENTIST: PlayerState(), } state.active_player = start_player simulation = Simulation(player_count=2) simulation.state = state return simulation
def test_state_copy(): simulation = Simulation() state_copy = deepcopy(simulation.state.internal_state) assert state_copy == simulation.state.internal_state TestGeneral.walk_trough_round(simulation) assert state_copy != simulation.state.internal_state simulation.state.internal_state = state_copy TestGeneral.walk_trough_round(simulation)
def test_infect_city_with_eradicated_virus(): simulation = Simulation() # cheat and pretend virus is eradicated simulation.state.cures[Virus.BLUE] = True simulation.state.cubes[Virus.BLUE] = 24 simulation.state.cities[City.ATLANTA].viral_state[Virus.BLUE] = 0 assert simulation.state.cities[City.ATLANTA].viral_state[ Virus.BLUE] == 0 simulation.state.infect_city(City.ATLANTA, times=2) assert simulation.state.cities[City.ATLANTA].viral_state[ Virus.BLUE] == 0
class GuiSimulation: def __init__(self): self.simulation = Simulation() self._moves: Dict[str, ActionInterface] = {} self.get_possible_moves() def perform_action(self, command_string: str = None) -> State: if command_string is None: return self.simulation.state self.simulation.step(self._moves[command_string]) moves = self.simulation.get_possible_actions() while not moves: moves = self.simulation.step(None) print(self.simulation.state.report()) return self.simulation.state def get_possible_moves(self) -> Set[str]: self._moves = { move.to_command(): move for move in self.simulation.get_possible_actions() } return set(self._moves.keys())
def __init__( self, num_epidemic_cards: int = 5, player_count: int = PLAYER_COUNT, characters: Set[int] = frozenset(), player_deck_shuffle_seed=None, infect_deck_shuffle_seed=None, epidemic_shuffle_seed=None, ): self._simulation = Simulation( num_epidemic_cards, player_count, characters, player_deck_shuffle_seed, infect_deck_shuffle_seed, epidemic_shuffle_seed, ) self._action_lookup, self.action_space = self._encode_possible_actions( self._simulation.get_possible_actions()) self.observation_space = self._get_obs() self._steps = 0 self._illegal_actions = 0 self.performed_actions_reward = list()
def test_deterministic_simulation(): simulation = Simulation( characters={Character.RESEARCHER, Character.CONTINGENCY_PLANNER}, player_deck_shuffle_seed=10, infect_deck_shuffle_seed=30, epidemic_shuffle_seed=12, ) before_state_copy = deepcopy(simulation.state.internal_state) TestGeneral.walk_trough_round(simulation) assert before_state_copy != simulation.state.internal_state # reset simulation after_state_copy = deepcopy(simulation.state.internal_state) simulation.state.internal_state = before_state_copy TestGeneral.walk_trough_round(simulation) assert simulation.state.internal_state == after_state_copy
def test_quiet_event(): simulation = Simulation() active_player = simulation.state.active_player simulation.state.players[active_player].add_card( EventCard.ONE_QUIET_NIGHT) event = OneQuietNight(player=active_player) actions = simulation.get_possible_actions() assert event in actions simulation.step(event) assert simulation.state.one_quiet_night assert EventCard.ONE_QUIET_NIGHT not in simulation.state.players[ active_player].cards simulation.state.phase = Phase.INFECTIONS simulation.step(None) assert simulation.state.phase == Phase.ACTIONS
def test_dissect_state_copy(): simulation = Simulation() state = simulation.state.internal_state state_copy = deepcopy(simulation.state.internal_state) infection_deck_mcopy = state.infection_deck assert state_copy.infection_deck == infection_deck_mcopy state.infection_deck = [] assert state_copy.infection_deck != [] assert state_copy.infection_deck == infection_deck_mcopy state.cities[City.ATLANTA].remove_research_station() assert state_copy.cities[City.ATLANTA].has_research_station() assert not state.cities[City.ATLANTA].has_research_station() semi_random_player = list(state.players.keys())[0] state.players[semi_random_player].clear_cards() assert state.players[semi_random_player].cards != {}
from pandemic.learning.mcts import Mcts from pandemic.learning.mcts_state import PandemicMctsState from pandemic.learning.sp_mcts import SpMcts from pandemic.simulation.model.actions import DriveFerry, DiscoverCure, ChooseCard from pandemic.simulation.model.enums import Character from pandemic.simulation.simulation import Simulation from pandemic.learning.easy_mode import easy_state env = Simulation( characters={Character.RESEARCHER, Character.CONTINGENCY_PLANNER}, player_deck_shuffle_seed=5, infect_deck_shuffle_seed=10, epidemic_shuffle_seed=12, ) action_filter = (lambda action: isinstance(action, DriveFerry) or isinstance( action, DiscoverCure) or isinstance(action, ChooseCard)) env.state.internal_state = easy_state print(easy_state.active_player) initial_state = PandemicMctsState(env, easy_state) mcts = SpMcts(initial_state, time_limit=30000, exploration_constant=0.06, D=0.1, select_treshold=100) next_state = initial_state # viz = Visualization(env.state.internal_state) bestAction, next_state = mcts.search() next_action = next_state._possible_actions[bestAction] print(next_action)
def __init__(self): self.simulation = Simulation() self._moves: Dict[str, ActionInterface] = {} self.get_possible_moves()
class Pandemic(gym.Env): def __init__( self, num_epidemic_cards: int = 5, player_count: int = PLAYER_COUNT, characters: Set[int] = frozenset(), player_deck_shuffle_seed=None, infect_deck_shuffle_seed=None, epidemic_shuffle_seed=None, ): self._simulation = Simulation( num_epidemic_cards, player_count, characters, player_deck_shuffle_seed, infect_deck_shuffle_seed, epidemic_shuffle_seed, ) self._action_lookup, self.action_space = self._encode_possible_actions( self._simulation.get_possible_actions()) self.observation_space = self._get_obs() self._steps = 0 self._illegal_actions = 0 self.performed_actions_reward = list() def reset(self): # TODO: smarter reset self._simulation.reset() self._action_lookup, self.action_space = self._encode_possible_actions( self._simulation.get_possible_actions()) self.observation_space = self._get_obs() self._steps = 0 self._illegal_actions = 0 self.performed_actions_reward.clear() def render(self, mode="human"): [print("%s %s" % (a, r)) for a, r in self.performed_actions_reward] print("steps: ", self._steps) print("illegal actions: ", self._illegal_actions) print("cures: ", self._simulation.state.cures) print("outbreaks: ", self._simulation.state.outbreaks) pass def step(self, action: int): action_statement = self._action_lookup.get(action, None) print(action_statement) if action_statement is None: self._illegal_actions += 1 return self.observation_space, -1, False, {"steps": self._steps} if action == 0: self._simulation.step(None) else: self._steps += 1 self._simulation.step(action_statement) reward = self._get_reward(self._simulation.state.internal_state) self.performed_actions_reward.append((action_statement, reward)) self._action_lookup, self.action_space = self._encode_possible_actions( self._simulation.get_possible_actions()) self.observation_space = self._get_obs() done = self._get_done() # observation, reward, done, info return self.observation_space, reward, done, {"steps": self._steps} def get_state_copy(self): return deepcopy(self._simulation.state.internal_state) def set_state(self, value): self._simulation.state.internal_state = value self._action_lookup, self.action_space = self._encode_possible_actions( self._simulation.get_possible_actions()) self.observation_space = self._get_obs() def _get_done(self): return self._simulation.state.game_state != GameState.RUNNING @staticmethod def _get_reward(state) -> float: # try to come up with sensible reward # extremely basic reward each cure -> += 0.25 # each card of same color for player uncured -> += 0.1 # each turn -> += 0.001 card_color_reward = sum( sum( pow(count, 0.01) - 1 for color, count in p.city_colors.items() if not state.cures[color] and count > 1) for p in state.players.values()) cure_reward = sum(state.cures.values()) * 100 step_reward = state.steps * 0.001 # each outbreak -> -x^1.5/25 outbreak_reward = math.pow(state.outbreaks, 1.5) / 25 * -1 reward = card_color_reward + cure_reward + outbreak_reward + step_reward return float(reward) @staticmethod def _encode_possible_actions( possible_actions: List[ActionInterface], ) -> Tuple[Dict[int, ActionInterface], np.ndarray]: if possible_actions is None or possible_actions == []: space = np.zeros(ACTION_SPACE_DIM) np.put(space, 0, 1) return {0: "Wait"}, space lookup = dict() features = [0] * ACTION_SPACE_DIM bump_dict = defaultdict(int) [ Pandemic._insert_action(features, lookup, bump_dict, action) for action in possible_actions ] return lookup, np.array(features) @staticmethod def _insert_action(llm, feature_actions, bump_dict, action): idx, value = action.feature bidx = idx + bump_dict[idx] bump_dict[idx] += 1 feature_actions[bidx] = action llm[bidx] = value return bidx, value @staticmethod def __insert_with_shift(feature_actions, index, value): if feature_actions.get(index, None) is None: feature_actions[index] = value return index else: return Pandemic.__insert_with_shift(feature_actions, index + 1, value) def _get_obs(self) -> np.array: state = self._simulation.state phase = state.phase # normalized outbreaks 1 = bad outbreaks = state.outbreaks / 8 # num_epidemics bigger = badder epidemics = state.infection_rate_marker / 7 # actions left smaller = less actions_left = state.actions_left / 4 # player deck size 0 = game over player_deck = len(state.player_deck) # size infection discard pile infection_discard = len(state.infection_discard_pile) # research stations left research_stations_left = state.research_stations # active player active_player = state.active_player ####### # combine single features ###### counts_vector = np.array([ active_player, phase, outbreaks, epidemics, actions_left, player_deck, infection_discard, research_stations_left, ]) # city state [ *researchstation *viral_states ] city_feature_tuples = [ tuple([int(city.has_research_station())]) + tuple(city.viral_state.values()) for city in state.cities.values() ] city_feature_vector = np.array(list(sum(zip(*city_feature_tuples), ()))) # player characters + player locations player_feature_tuples = [(id, player.city) for id, player in state.players.items()] player_feature_vector = np.array( list(sum(zip(*player_feature_tuples), ()))) # player cards hands_feature_vector = np.ndarray.flatten( np.array([ Pandemic.pad_with_zeros(10, np.array(list(player.cards))) for player in state.players.values() ])) # cures cures_vector = [int(s) for s in state.cures.values()] # list of cubes normalized less is bad cubes_stack_vector = np.array([c / 24 for c in state.cubes.values()]) # list of latest outbreaks ? return np.concatenate([ counts_vector, city_feature_vector, player_feature_vector, hands_feature_vector, cubes_stack_vector, cures_vector, ]) @staticmethod def pad_with_zeros(n, array): shape = np.shape(array) padded_array = np.zeros(n) padded_array[:shape[0]] = array return padded_array
def test_whole_round(): simulation = Simulation() TestGeneral.walk_trough_round(simulation)
def test_easy_mode_play_trough(): env = Simulation( characters={Character.QUARANTINE_SPECIALIST, Character.SCIENTIST}, num_epidemic_cards=4, player_deck_shuffle_seed=5, infect_deck_shuffle_seed=10, epidemic_shuffle_seed=12, ) env.state.internal_state = easy_state ## test env.step(DriveFerry(player=5, destination=48)) env.step(DriveFerry(player=5, destination=34)) env.step(DriveFerry(player=5, destination=48)) env.step(DriveFerry(player=5, destination=2)) env.step(None) env.step(None) env.step(None) env.step(None) env.step(DiscoverCure(target_virus=3)) env.get_possible_actions() env.step(ChooseCard(player=7, card=40)) env.step(ChooseCard(player=7, card=25)) env.step(ChooseCard(player=7, card=19)) env.step(ChooseCard(player=7, card=17)) env.step(DriveFerry(player=7, destination=48)) env.step(DriveFerry(player=7, destination=29)) env.step(DriveFerry(player=7, destination=2)) env.step(None) env.step(None) env.step(None) env.step(None) env.step(DiscoverCure(target_virus=1)) env.get_possible_actions() env.step(ChooseCard(player=5, card=1)) env.step(ChooseCard(player=5, card=38)) env.step(ChooseCard(player=5, card=10)) env.step(ChooseCard(player=5, card=43)) env.step(ChooseCard(player=5, card=24)) env.step(DriveFerry(player=5, destination=48)) env.step(DriveFerry(player=5, destination=29)) env.step(DriveFerry(player=5, destination=2)) env.step(None) env.step(None) env.step(None) env.step(None) env.step(DriveFerry(player=7, destination=48)) env.step(DriveFerry(player=7, destination=34)) env.step(DriveFerry(player=7, destination=48)) env.step(DriveFerry(player=7, destination=2)) env.step(None) env.step(None) env.step(None) env.step(None) env.step(DriveFerry(player=5, destination=48)) env.step(DriveFerry(player=5, destination=34)) env.step(DriveFerry(player=5, destination=48)) env.step(DriveFerry(player=5, destination=2)) env.step(None) env.step(None) env.step(None) env.step(None) env.step(DiscoverCure(target_virus=2)) env.get_possible_actions() env.step(ChooseCard(player=7, card=42)) env.step(ChooseCard(player=7, card=4)) env.step(ChooseCard(player=7, card=44)) env.step(ChooseCard(player=7, card=13)) env.step(DriveFerry(player=7, destination=48)) env.step(DriveFerry(player=7, destination=29)) env.step(DriveFerry(player=7, destination=2)) env.step(None) env.step(None) env.step(None) env.step(None) env.get_possible_actions() env.step(DiscoverCure(target_virus=4)) env.get_possible_actions() env.get_possible_actions() env.step(ChooseCard(player=5, card=32)) env.step(ChooseCard(player=5, card=3)) env.step(ChooseCard(player=5, card=37)) env.step(ChooseCard(player=5, card=8)) env.step(ChooseCard(player=5, card=9)) assert env.state.game_state == GameState.WIN
def switch_player_card(s, player, x, y): s.players[player].remove_card(x) s.players[player].add_card(y) s.player_deck = replace_card(s.player_deck, y, x) def swap_elements(list, x, y): index1 = list.index(x) index2 = list.index(y) list[index1], list[index2] = list[index2], list[index1] _env = Simulation( characters={Character.QUARANTINE_SPECIALIST, Character.SCIENTIST}, num_epidemic_cards=4, player_deck_shuffle_seed=5, infect_deck_shuffle_seed=10, epidemic_shuffle_seed=12, ) _state = _env.state.internal_state _state.active_player = 5 switch_player_card(_state, Character.QUARANTINE_SPECIALIST, 17, 1) switch_player_card(_state, Character.QUARANTINE_SPECIALIST, 23, 10) switch_player_card(_state, Character.QUARANTINE_SPECIALIST, 2, 24) switch_player_card(_state, Character.SCIENTIST, 57, 17) switch_player_card(_state, Character.SCIENTIST, 56, 40)