示例#1
0
    def observe(self, state, reward, done, actions):
        # Update the agent

        action = self.find_next_action(state, actions)
        self.next_action = action[0]
        game.out("Next action will be", self.next_action,
                 "; monte carlo gives a score of", action[1])
示例#2
0
    def act(self):
        actions_by_type = {
            action_id: []
            for action_id in range(len(self.action_space))
        }
        for action in self.actions:
            a = self.classify_action(action, to='id')
            actions_by_type[a].append(action)

        s = self.get_current_state_id()
        actions = self.Q[s, :].copy()

        next_action_type = self.softmax_policy(actions)
        possible_actions = actions_by_type[next_action_type]
        while len(possible_actions) == 0:
            actions[next_action_type] = 0
            next_action_type = self.softmax_policy(actions)
            possible_actions = actions_by_type[next_action_type]
        action = self.choose(possible_actions, next_action_type)

        self.last_action_id = next_action_type

        self.next_action = action
        game.out("Deciding to do", action)
        return action
示例#3
0
    def __init__(self, adversarial=True, state=None):
        self.state = state if state != None else State(adversarial=adversarial)
        self.step = 0
        self.position = 0
        self.GAME_ENDED = False

        game.out("New game", verbose=-1)
    def act_old(self):
        # Return an action
        if random.random() < self.epsilon:
            return random.choice(self.actions)
        actions_by_type = {
            action_id: []
            for action_id in range(len(self.action_space))
        }
        for action in self.actions:
            a = self.classify_action(action, to='id')
            actions_by_type[a].append(action)

        s = self.get_current_state_id()
        rank_action_types = np.flip(np.argsort(self.Q[s, :]), axis=0)
        rank = 0
        next_action_type = rank_action_types[rank]
        possible_actions = actions_by_type[next_action_type]
        while len(possible_actions) == 0:
            rank += 1
            if rank == len(rank_action_types):
                possible_actions = [{'type': 'do_nothing', 'params': None}]
            else:
                next_action_type = rank_action_types[rank]
                possible_actions = actions_by_type[next_action_type]

        action = self.choose(possible_actions, next_action_type)
        self.next_action = action
        game.out("Deciding to do", action)
        return action
示例#5
0
 def reserve_card(self, state, card):
     '''
     Take a card <card> and put it in the player's hand to reserve it. Only him/her can buy it from now on
     '''
     assert len(self.hand) < 3, ("Too many cards in hand for " + self.name)
     self.hand.append(card)
     game.out(self.name, "reserved the following card :", card)
     self.take_tokens(state, [(game.JOKER_COLOR, 1)])
示例#6
0
    def reset(self):
        self.state.reset()
        self.step = 0

        # State
        self.GAME_ENDED = False
        game.out("Environment reset")

        return (self.get_player())
示例#7
0
 def get_noble_from_id(self, state, noble_id):
     '''
     Take a noble tile from the table and gain prestige
     '''
     noble = state.tiles.pop(noble_id)
     self.nobles.append(noble)
     self.prestige += 3
     game.out(self.name, "has gained a noble", noble, "and now have",
              self.prestige, "prestige points")
示例#8
0
 def pay(self, state, amount, color):
     '''
     Pay <amount> of <color> tokens
     '''
     if self.tokens[color] < amount:
         game.out("WARNING : the player", self.name, "tried to pay", amount,
                  " ", color, "tokens but had only", self.tokens[color])
         game.out(self)
         amount = self.tokens[color]
     self.tokens[color] -= amount
     state.tokens[color] += amount
示例#9
0
 def choose_noble(self, state, nobles_id):
     '''
     If several nobles can visit a player at the end of its turn, this functions chooses one randomly. If there is only one, then there's no choice.
     /!\  Here, <nobles> is a list of nobles' id, i.e. their index in <state.tiles>
     '''
     if len(nobles_id) == 1:
         noble_id = nobles_id[0]
     else:
         game.out(self.name, "can be visited by", len(nobles_id), "nobles")
         noble_id = random.choice(nobles_id)
     self.get_noble_from_id(state, noble_id)
示例#10
0
    def reset(self, seed=None):
        '''
        Reset the state
        '''
        random.seed(seed)

        self._init_deck()

        # State
        self.turn = 0
        self.current_player = 0
        self.TARGET_REACHED = False
        self.GAME_ENDED = False
        self.winner = "(none)"
        game.out("State reset")
示例#11
0
    def take_tokens(self, state, tokens):
        '''
        Take tokens from the table.
        <tokens> should be a list of tuples (<token_color>, <token_quantity>)
        '''
        message = self.name + " took " + ", ".join([
            str(amount) + " " + color + " token(s)" for color, amount in tokens
        ])

        for color, amount in tokens:
            state.tokens[color] -= amount
            self.tokens[color] += amount
            # Update total number of tokens
            self.n_tokens += amount

        game.out(message)
示例#12
0
    def buy_card(self, state, card):
        '''
        Buy the card <card>
        '''
        assert self.can_buy(card)

        # First pay for the mine...
        joker_color = game.JOKER_COLOR
        price = self.compute_discounted_price(card)
        for color, amount in price.items():
            if self.tokens[color] >= amount:
                # Enough tokens to pay directly
                self.pay(state, amount, color)
            else:
                # Else, use jokers
                normal_price = self.tokens[color]
                joker_price = amount - self.tokens[color]
                self.pay(state, normal_price, color)
                self.pay(state, joker_price, joker_color)

        # ...then receive bonuses and extra prestige
        self.prestige += card.prestige
        self.bonuses[card.bonus] += 1
        game.out(self.name, "bought the following card :", card)
示例#13
0
 def player_has_reached_target(self, player):
     '''
     This function is called when one of the player has reached the prestige target
     '''
     game.out("\n\nCONGRATS !!!!")
     game.out(
         player.name, "has reached", player.prestige,
         "points. The game will end after the current turn is complete")
     game.out("!!!!!!!!!!!!!\n\n")
     self.TARGET_REACHED = True
示例#14
0
def compare(a, b, n_games=100, max_step=100, display_results=True):
    t_start = time()
    board = Environment()
    players = ['Player A', 'Player B']
    # Results
    victories_a = 0
    victories_b = 0
    diff_a = []
    diff_b = []
    diff = []
    rew_a = 0
    rew_b = 0

    for i in range(n_games):
        board.reset()

        # Start new game
        player_a = board.get_player(0)
        player_b = board.get_player(1)

        initial_state = board.state.visible()
        actions_a = board.get_possible_actions(player_a)
        actions_b = board.get_possible_actions(player_b)

        a.new_game(player_a, initial_state, actions_a)
        b.new_game(player_b, initial_state, actions_b)

        # Start playing !
        t = 0
        reward_a = 0
        reward_b = 0
        game_ended = False

        while not game_ended and t < max_step:
            # -- Beginning of A's turn --
            # Observe current state
            state = board.get_visible_state(a.identity)
            actions = board.get_possible_actions(a.identity)

            a.observe(state, reward_a, game_ended, actions)
            action = a.act()

            state, reward_a, game_ended, debug = board.take_action(
                action, a.identity)
            rew_a += reward_a
            # -- End of turn --

            # -- Beginning of B's turn --
            # Observe current state
            state = board.get_visible_state(b.identity)
            actions = board.get_possible_actions(b.identity)

            b.observe(state, reward_b, game_ended, actions)
            action = b.act()

            state, reward_b, game_ended, debug = board.take_action(
                action, b.identity)
            rew_b += reward_b
            # -- End of turn --

            # Other players' turn
            board.autoplay()

            t += 1

        if game_ended:
            winner_id = board.winner('pos')
            diff.append(a.identity.prestige - b.identity.prestige)
            if winner_id == 0:
                game.out("Player A won in",
                         t,
                         "steps. A scored",
                         a.identity.prestige,
                         "points, B scored",
                         b.identity.prestige,
                         "points.",
                         verbose=1)
                victories_a += 1
                diff_a.append(a.identity.prestige - b.identity.prestige)
            elif winner_id == 1:
                game.out("Player B won in",
                         t,
                         "steps. A scored",
                         a.identity.prestige,
                         "points, B scored",
                         b.identity.prestige,
                         "points.",
                         verbose=1)
                victories_b += 1
                diff_b.append(b.identity.prestige - a.identity.prestige)
            if i % 100 == 0 and i > 0:
                game.out("game",
                         i,
                         "out of",
                         n_games,
                         "score is",
                         victories_a,
                         "-",
                         victories_b,
                         verbose=0)
            if i % 1000 == 0 and i > 0:
                game.out(i,
                         "games played,", (n_games - i),
                         "to go. Elapsed time :", (time() - t_start),
                         "seconds. ETA :",
                         (n_games - i) * (time() - t_start) / i,
                         verbose=0)

    t_end = time()
    duration = t_end - t_start
    # Results :

    wid = 0 if victories_a > victories_b else 1
    wname = players[wid]
    average_diff = sum(diff) / len(diff)
    if display_results:
        print(n_games, "iterations finished after", duration, "seconds.\n -")
        print("Winner :", wname)
        print(" -")
        print("A wins :", victories_a)
        print("B wins :", victories_b)
        print(" -")
        print("% A :", 100 * (victories_a / (n_games)))
        print("% B :", 100 * (victories_b / (n_games)))
        print("Average score dist between A and B:", average_diff)

    return victories_a, victories_b, diff_a, diff_b, average_diff
示例#15
0
 def act(self):
     # Return an action
     action = self.next_action
     game.out("Deciding to do", action)
     return action
示例#16
0
 def observe(self, state, reward, done, actions):
     # Update the agent
     self.next_action = random.choice(actions)
     game.out("Next action will be", self.next_action)
示例#17
0
 def print_deck(self):
     game.out("splendor - turn", self.turn, "- now playing : player",
              self.current_player)
     game.out(
         "---------------------------------------------------------------------------"
     )
     game.out("".join([str(t) for t in self.tiles]))
     for i in range(game.BOARD_X):
         game.out("\t".join([str(c) for c in self.cards[i]]))
     game.out(" - ".join(
         [str(n) + " " + color for color, n in self.tokens.items()]))
     game.out(
         "---------------------------------------------------------------------------"
     )
     game.out("Players :", " - ".join([p.name for p in self.players]))
示例#18
0
    def _init_deck(self, nb_players=4):
        # Retrieve development cards
        l1, l2, l3 = self.get_cards()
        game.out("-- Initializing the game --")
        game.out("Nb of level-1 cards :", len(l1))
        game.out("Nb of level-2 cards :", len(l2))
        game.out("Nb of level-3 cards :", len(l3))

        random.shuffle(l1)
        random.shuffle(l2)
        random.shuffle(l3)
        # Set up the deck
        nb_reveal = game.BOARD_Y
        deck_1, column_1 = l1[:-nb_reveal], l1[-nb_reveal:]
        deck_2, column_2 = l2[:-nb_reveal], l2[-nb_reveal:]
        deck_3, column_3 = l3[:-nb_reveal], l3[-nb_reveal:]

        visible_cards = [column_1, column_2, column_3]

        # Tokens
        tokens = self.get_tokens()

        # Tiles
        nb_tiles = nb_players + 1
        tiles = self.get_tiles()
        tiles = random.sample(tiles, nb_tiles)

        # Declare attributes
        self.cards = visible_cards
        self.tiles = tiles
        self.tokens = tokens
        self.deck = [deck_1, deck_2, deck_3]
        self.players = [PlayerData(i) for i in range(game.NB_PLAYERS)]
        players_names = ["You"] + random.sample(game.PLAYER_NAMES,
                                                game.NB_PLAYERS - 1)
        for i, name in enumerate(players_names):
            self.players[i].rename(name)

        self.print_deck()

        game.out("-- Starting turn", (self.turn + 1), "-- ")
        game.out("Now playing :", self.get_current_player().name)
示例#19
0
    def step(self, action, player):
        '''
        Main function. Given an action <action> and a player <player>, it updates the state
        accordingly
        
        Input :
        <action> is a dict with two keys :
            - type : a string among <game.POSSIBLE_ACTIONS>
            - params : the parameters of the action. Its format depends on the type.
                - take_3 : list/iterable of three color names : [<color_1>, <color_2>, <color_3>]
                - take_2 : string, color name
                - reserve : origin and coordinate of the card
                    - ['from_table', (i, j)]
                    - ['from_deck', i]
                - purchase : origin and coordinate of the card
                    - ['from_table', (i, j)]
                    - ['from_hand', i]
        '''
        if self.GAME_ENDED:
            return

        [TAKE_3, TAKE_2, RESERVE, PURCHASE, DO_NOTHING] = game.POSSIBLE_ACTIONS
        action_type = action['type']
        params = action['params']

        if action_type == TAKE_3:
            tokens = [(color, 1) for color in params]
            player.take_tokens(self, tokens)

        elif action_type == TAKE_2:
            tokens = [(params, 2)]
            player.take_tokens(self, tokens)

        elif action_type == RESERVE:
            [origin, params] = params
            if origin == 'from_table':
                i, j = params
                card = self.get_card_from_table(i, j)
                player.reserve_card(self, card)

            elif origin == 'from_deck':
                i = params
                card = self.get_card_from_deck(i)
                player.reserve_card(self, card)

        elif action_type == PURCHASE:
            [origin, params] = params
            if origin == 'from_table':
                i, j = params
                card = self.get_card_from_table(i, j)
                player.buy_card(self, card)
            elif origin == 'from_hand':
                i = params
                card = player.pop_card_from_hand(i)
                player.buy_card(self, card)
        elif action_type == DO_NOTHING:
            game.out(self.get_current_player().name,
                     "doesn't do anything this turn")

        # CHECK WHOSE NOBLES ARE VISITING
        visiting_nobles = []
        for noble_id in range(len(self.tiles)):
            noble = self.tiles[noble_id]
            if noble.can_visit(player):
                visiting_nobles.append(noble_id)
        if len(visiting_nobles) > 0:
            player.choose_noble(self, visiting_nobles)

        # CHECK IF PLAYER HAS THE RIGHT AMOUNT OF TOKENS
        player.remove_extra_tokens(self)

        # CHECK IF PLAYER HAS WON
        if player.has_won(self):
            if self.adversarial or self.current_player == 0:
                self.player_has_reached_target(player)
                self.GAME_ENDED = True
                self.TARGET_REACHED = True
                game.out("-- END OF THE GAME --")
                game.out(self.get_results())
                return

        self.current_player += 1
        if self.current_player == game.NB_PLAYERS:
            game.out("End of turn", self.turn, "\n")
            if game.INCREMENTAL:
                s = input("Continue ?")
                if s in ["quit", "cancel", "no", "No", "N", "n"]:
                    return
                else:
                    game.out("Continuing...\n\n")
            if self.TARGET_REACHED:
                self.GAME_ENDED = True
                game.out("-- END OF THE GAME --")
                game.out(self.get_results())
                return

            self.turn += 1
            self.current_player = 0
            game.out("-- Starting turn", (self.turn + 1), "-- ")
        game.out("Now playing :", self.get_current_player().name, verbose=3)