Пример #1
0
def cure_virus(simulation: Simulation, cure_card_combination: List[City],
               player: Character):
    choices = {ChooseCard(player, card) for card in cure_card_combination}
    assert choices == set(simulation.get_possible_actions())
    for card in cure_card_combination:
        na = ChooseCard(player, card)
        assert na in simulation.get_possible_actions()
        simulation.step(na)
Пример #2
0
def create_less_random_simulation(
        start_player: Character = Character.SCIENTIST):
    state = State(player_count=2)
    player_state = PlayerState()
    player_state.clear_cards()
    state.players = {
        start_player:
        PlayerState(),
        Character.RESEARCHER if start_player == Character.SCIENTIST else Character.SCIENTIST:
        PlayerState(),
    }

    state.active_player = start_player
    simulation = Simulation(player_count=2)
    simulation.state = state
    return simulation
Пример #3
0
    def test_state_copy():
        simulation = Simulation()

        state_copy = deepcopy(simulation.state.internal_state)
        assert state_copy == simulation.state.internal_state

        TestGeneral.walk_trough_round(simulation)

        assert state_copy != simulation.state.internal_state
        simulation.state.internal_state = state_copy

        TestGeneral.walk_trough_round(simulation)
Пример #4
0
    def test_infect_city_with_eradicated_virus():
        simulation = Simulation()

        # cheat and pretend virus is eradicated
        simulation.state.cures[Virus.BLUE] = True
        simulation.state.cubes[Virus.BLUE] = 24
        simulation.state.cities[City.ATLANTA].viral_state[Virus.BLUE] = 0

        assert simulation.state.cities[City.ATLANTA].viral_state[
            Virus.BLUE] == 0
        simulation.state.infect_city(City.ATLANTA, times=2)
        assert simulation.state.cities[City.ATLANTA].viral_state[
            Virus.BLUE] == 0
Пример #5
0
class GuiSimulation:
    def __init__(self):
        self.simulation = Simulation()
        self._moves: Dict[str, ActionInterface] = {}
        self.get_possible_moves()

    def perform_action(self, command_string: str = None) -> State:
        if command_string is None:
            return self.simulation.state

        self.simulation.step(self._moves[command_string])
        moves = self.simulation.get_possible_actions()
        while not moves:
            moves = self.simulation.step(None)
        print(self.simulation.state.report())
        return self.simulation.state

    def get_possible_moves(self) -> Set[str]:
        self._moves = {
            move.to_command(): move
            for move in self.simulation.get_possible_actions()
        }
        return set(self._moves.keys())
Пример #6
0
 def __init__(
     self,
     num_epidemic_cards: int = 5,
     player_count: int = PLAYER_COUNT,
     characters: Set[int] = frozenset(),
     player_deck_shuffle_seed=None,
     infect_deck_shuffle_seed=None,
     epidemic_shuffle_seed=None,
 ):
     self._simulation = Simulation(
         num_epidemic_cards,
         player_count,
         characters,
         player_deck_shuffle_seed,
         infect_deck_shuffle_seed,
         epidemic_shuffle_seed,
     )
     self._action_lookup, self.action_space = self._encode_possible_actions(
         self._simulation.get_possible_actions())
     self.observation_space = self._get_obs()
     self._steps = 0
     self._illegal_actions = 0
     self.performed_actions_reward = list()
Пример #7
0
 def test_deterministic_simulation():
     simulation = Simulation(
         characters={Character.RESEARCHER, Character.CONTINGENCY_PLANNER},
         player_deck_shuffle_seed=10,
         infect_deck_shuffle_seed=30,
         epidemic_shuffle_seed=12,
     )
     before_state_copy = deepcopy(simulation.state.internal_state)
     TestGeneral.walk_trough_round(simulation)
     assert before_state_copy != simulation.state.internal_state
     # reset simulation
     after_state_copy = deepcopy(simulation.state.internal_state)
     simulation.state.internal_state = before_state_copy
     TestGeneral.walk_trough_round(simulation)
     assert simulation.state.internal_state == after_state_copy
Пример #8
0
    def test_quiet_event():
        simulation = Simulation()
        active_player = simulation.state.active_player

        simulation.state.players[active_player].add_card(
            EventCard.ONE_QUIET_NIGHT)

        event = OneQuietNight(player=active_player)
        actions = simulation.get_possible_actions()
        assert event in actions
        simulation.step(event)
        assert simulation.state.one_quiet_night
        assert EventCard.ONE_QUIET_NIGHT not in simulation.state.players[
            active_player].cards
        simulation.state.phase = Phase.INFECTIONS
        simulation.step(None)
        assert simulation.state.phase == Phase.ACTIONS
Пример #9
0
    def test_dissect_state_copy():
        simulation = Simulation()

        state = simulation.state.internal_state
        state_copy = deepcopy(simulation.state.internal_state)

        infection_deck_mcopy = state.infection_deck
        assert state_copy.infection_deck == infection_deck_mcopy
        state.infection_deck = []
        assert state_copy.infection_deck != []
        assert state_copy.infection_deck == infection_deck_mcopy

        state.cities[City.ATLANTA].remove_research_station()

        assert state_copy.cities[City.ATLANTA].has_research_station()
        assert not state.cities[City.ATLANTA].has_research_station()

        semi_random_player = list(state.players.keys())[0]
        state.players[semi_random_player].clear_cards()
        assert state.players[semi_random_player].cards != {}
Пример #10
0
from pandemic.learning.mcts import Mcts
from pandemic.learning.mcts_state import PandemicMctsState
from pandemic.learning.sp_mcts import SpMcts
from pandemic.simulation.model.actions import DriveFerry, DiscoverCure, ChooseCard
from pandemic.simulation.model.enums import Character
from pandemic.simulation.simulation import Simulation
from pandemic.learning.easy_mode import easy_state

env = Simulation(
    characters={Character.RESEARCHER, Character.CONTINGENCY_PLANNER},
    player_deck_shuffle_seed=5,
    infect_deck_shuffle_seed=10,
    epidemic_shuffle_seed=12,
)

action_filter = (lambda action: isinstance(action, DriveFerry) or isinstance(
    action, DiscoverCure) or isinstance(action, ChooseCard))

env.state.internal_state = easy_state
print(easy_state.active_player)
initial_state = PandemicMctsState(env, easy_state)
mcts = SpMcts(initial_state,
              time_limit=30000,
              exploration_constant=0.06,
              D=0.1,
              select_treshold=100)
next_state = initial_state
# viz = Visualization(env.state.internal_state)
bestAction, next_state = mcts.search()
next_action = next_state._possible_actions[bestAction]
print(next_action)
Пример #11
0
 def __init__(self):
     self.simulation = Simulation()
     self._moves: Dict[str, ActionInterface] = {}
     self.get_possible_moves()
Пример #12
0
class Pandemic(gym.Env):
    def __init__(
        self,
        num_epidemic_cards: int = 5,
        player_count: int = PLAYER_COUNT,
        characters: Set[int] = frozenset(),
        player_deck_shuffle_seed=None,
        infect_deck_shuffle_seed=None,
        epidemic_shuffle_seed=None,
    ):
        self._simulation = Simulation(
            num_epidemic_cards,
            player_count,
            characters,
            player_deck_shuffle_seed,
            infect_deck_shuffle_seed,
            epidemic_shuffle_seed,
        )
        self._action_lookup, self.action_space = self._encode_possible_actions(
            self._simulation.get_possible_actions())
        self.observation_space = self._get_obs()
        self._steps = 0
        self._illegal_actions = 0
        self.performed_actions_reward = list()

    def reset(self):
        # TODO: smarter reset
        self._simulation.reset()
        self._action_lookup, self.action_space = self._encode_possible_actions(
            self._simulation.get_possible_actions())
        self.observation_space = self._get_obs()
        self._steps = 0
        self._illegal_actions = 0
        self.performed_actions_reward.clear()

    def render(self, mode="human"):
        [print("%s %s" % (a, r)) for a, r in self.performed_actions_reward]
        print("steps: ", self._steps)
        print("illegal actions: ", self._illegal_actions)
        print("cures: ", self._simulation.state.cures)
        print("outbreaks: ", self._simulation.state.outbreaks)
        pass

    def step(self, action: int):
        action_statement = self._action_lookup.get(action, None)
        print(action_statement)
        if action_statement is None:
            self._illegal_actions += 1
            return self.observation_space, -1, False, {"steps": self._steps}

        if action == 0:
            self._simulation.step(None)
        else:
            self._steps += 1
            self._simulation.step(action_statement)

        reward = self._get_reward(self._simulation.state.internal_state)
        self.performed_actions_reward.append((action_statement, reward))
        self._action_lookup, self.action_space = self._encode_possible_actions(
            self._simulation.get_possible_actions())

        self.observation_space = self._get_obs()
        done = self._get_done()
        # observation, reward, done, info
        return self.observation_space, reward, done, {"steps": self._steps}

    def get_state_copy(self):
        return deepcopy(self._simulation.state.internal_state)

    def set_state(self, value):
        self._simulation.state.internal_state = value
        self._action_lookup, self.action_space = self._encode_possible_actions(
            self._simulation.get_possible_actions())
        self.observation_space = self._get_obs()

    def _get_done(self):
        return self._simulation.state.game_state != GameState.RUNNING

    @staticmethod
    def _get_reward(state) -> float:
        # try to come up with sensible reward
        # extremely basic reward each cure -> += 0.25
        # each card of same color for player uncured -> += 0.1
        # each turn -> += 0.001
        card_color_reward = sum(
            sum(
                pow(count, 0.01) - 1 for color, count in p.city_colors.items()
                if not state.cures[color] and count > 1)
            for p in state.players.values())
        cure_reward = sum(state.cures.values()) * 100
        step_reward = state.steps * 0.001

        # each outbreak -> -x^1.5/25
        outbreak_reward = math.pow(state.outbreaks, 1.5) / 25 * -1
        reward = card_color_reward + cure_reward + outbreak_reward + step_reward
        return float(reward)

    @staticmethod
    def _encode_possible_actions(
        possible_actions: List[ActionInterface],
    ) -> Tuple[Dict[int, ActionInterface], np.ndarray]:
        if possible_actions is None or possible_actions == []:
            space = np.zeros(ACTION_SPACE_DIM)
            np.put(space, 0, 1)
            return {0: "Wait"}, space
        lookup = dict()
        features = [0] * ACTION_SPACE_DIM
        bump_dict = defaultdict(int)
        [
            Pandemic._insert_action(features, lookup, bump_dict, action)
            for action in possible_actions
        ]

        return lookup, np.array(features)

    @staticmethod
    def _insert_action(llm, feature_actions, bump_dict, action):
        idx, value = action.feature
        bidx = idx + bump_dict[idx]
        bump_dict[idx] += 1
        feature_actions[bidx] = action
        llm[bidx] = value
        return bidx, value

    @staticmethod
    def __insert_with_shift(feature_actions, index, value):
        if feature_actions.get(index, None) is None:
            feature_actions[index] = value
            return index
        else:
            return Pandemic.__insert_with_shift(feature_actions, index + 1,
                                                value)

    def _get_obs(self) -> np.array:
        state = self._simulation.state
        phase = state.phase
        # normalized outbreaks 1 = bad
        outbreaks = state.outbreaks / 8
        # num_epidemics bigger = badder
        epidemics = state.infection_rate_marker / 7
        # actions left smaller = less
        actions_left = state.actions_left / 4
        # player deck size 0 = game over
        player_deck = len(state.player_deck)
        # size infection discard pile
        infection_discard = len(state.infection_discard_pile)
        # research stations left
        research_stations_left = state.research_stations
        # active player
        active_player = state.active_player

        #######
        # combine single features
        ######

        counts_vector = np.array([
            active_player,
            phase,
            outbreaks,
            epidemics,
            actions_left,
            player_deck,
            infection_discard,
            research_stations_left,
        ])

        # city state [ *researchstation *viral_states ]
        city_feature_tuples = [
            tuple([int(city.has_research_station())]) +
            tuple(city.viral_state.values()) for city in state.cities.values()
        ]
        city_feature_vector = np.array(list(sum(zip(*city_feature_tuples),
                                                ())))
        # player characters + player locations
        player_feature_tuples = [(id, player.city)
                                 for id, player in state.players.items()]
        player_feature_vector = np.array(
            list(sum(zip(*player_feature_tuples), ())))

        # player cards
        hands_feature_vector = np.ndarray.flatten(
            np.array([
                Pandemic.pad_with_zeros(10, np.array(list(player.cards)))
                for player in state.players.values()
            ]))
        # cures
        cures_vector = [int(s) for s in state.cures.values()]

        # list of cubes normalized less is bad
        cubes_stack_vector = np.array([c / 24 for c in state.cubes.values()])
        # list of latest outbreaks ?

        return np.concatenate([
            counts_vector,
            city_feature_vector,
            player_feature_vector,
            hands_feature_vector,
            cubes_stack_vector,
            cures_vector,
        ])

    @staticmethod
    def pad_with_zeros(n, array):
        shape = np.shape(array)
        padded_array = np.zeros(n)
        padded_array[:shape[0]] = array
        return padded_array
Пример #13
0
    def test_whole_round():
        simulation = Simulation()

        TestGeneral.walk_trough_round(simulation)
Пример #14
0
def test_easy_mode_play_trough():
    env = Simulation(
        characters={Character.QUARANTINE_SPECIALIST, Character.SCIENTIST},
        num_epidemic_cards=4,
        player_deck_shuffle_seed=5,
        infect_deck_shuffle_seed=10,
        epidemic_shuffle_seed=12,
    )

    env.state.internal_state = easy_state

    ## test
    env.step(DriveFerry(player=5, destination=48))
    env.step(DriveFerry(player=5, destination=34))
    env.step(DriveFerry(player=5, destination=48))
    env.step(DriveFerry(player=5, destination=2))
    env.step(None)
    env.step(None)
    env.step(None)
    env.step(None)

    env.step(DiscoverCure(target_virus=3))
    env.get_possible_actions()
    env.step(ChooseCard(player=7, card=40))
    env.step(ChooseCard(player=7, card=25))
    env.step(ChooseCard(player=7, card=19))
    env.step(ChooseCard(player=7, card=17))
    env.step(DriveFerry(player=7, destination=48))
    env.step(DriveFerry(player=7, destination=29))
    env.step(DriveFerry(player=7, destination=2))
    env.step(None)
    env.step(None)
    env.step(None)
    env.step(None)

    env.step(DiscoverCure(target_virus=1))
    env.get_possible_actions()
    env.step(ChooseCard(player=5, card=1))
    env.step(ChooseCard(player=5, card=38))
    env.step(ChooseCard(player=5, card=10))
    env.step(ChooseCard(player=5, card=43))
    env.step(ChooseCard(player=5, card=24))
    env.step(DriveFerry(player=5, destination=48))
    env.step(DriveFerry(player=5, destination=29))
    env.step(DriveFerry(player=5, destination=2))
    env.step(None)
    env.step(None)
    env.step(None)
    env.step(None)

    env.step(DriveFerry(player=7, destination=48))
    env.step(DriveFerry(player=7, destination=34))
    env.step(DriveFerry(player=7, destination=48))
    env.step(DriveFerry(player=7, destination=2))
    env.step(None)
    env.step(None)
    env.step(None)
    env.step(None)

    env.step(DriveFerry(player=5, destination=48))
    env.step(DriveFerry(player=5, destination=34))
    env.step(DriveFerry(player=5, destination=48))
    env.step(DriveFerry(player=5, destination=2))
    env.step(None)
    env.step(None)
    env.step(None)
    env.step(None)

    env.step(DiscoverCure(target_virus=2))
    env.get_possible_actions()
    env.step(ChooseCard(player=7, card=42))
    env.step(ChooseCard(player=7, card=4))
    env.step(ChooseCard(player=7, card=44))
    env.step(ChooseCard(player=7, card=13))
    env.step(DriveFerry(player=7, destination=48))
    env.step(DriveFerry(player=7, destination=29))
    env.step(DriveFerry(player=7, destination=2))
    env.step(None)
    env.step(None)
    env.step(None)
    env.step(None)

    env.get_possible_actions()
    env.step(DiscoverCure(target_virus=4))
    env.get_possible_actions()
    env.get_possible_actions()
    env.step(ChooseCard(player=5, card=32))
    env.step(ChooseCard(player=5, card=3))
    env.step(ChooseCard(player=5, card=37))
    env.step(ChooseCard(player=5, card=8))
    env.step(ChooseCard(player=5, card=9))
    assert env.state.game_state == GameState.WIN
Пример #15
0
def switch_player_card(s, player, x, y):
    s.players[player].remove_card(x)
    s.players[player].add_card(y)
    s.player_deck = replace_card(s.player_deck, y, x)


def swap_elements(list, x, y):
    index1 = list.index(x)
    index2 = list.index(y)
    list[index1], list[index2] = list[index2], list[index1]


_env = Simulation(
    characters={Character.QUARANTINE_SPECIALIST, Character.SCIENTIST},
    num_epidemic_cards=4,
    player_deck_shuffle_seed=5,
    infect_deck_shuffle_seed=10,
    epidemic_shuffle_seed=12,
)


_state = _env.state.internal_state
_state.active_player = 5
switch_player_card(_state, Character.QUARANTINE_SPECIALIST, 17, 1)
switch_player_card(_state, Character.QUARANTINE_SPECIALIST, 23, 10)
switch_player_card(_state, Character.QUARANTINE_SPECIALIST, 2, 24)

switch_player_card(_state, Character.SCIENTIST, 57, 17)
switch_player_card(_state, Character.SCIENTIST, 56, 40)