Exemplo n.º 1
0
    def getValidMoves(self, board, player):
        """
        Input:
            board: current board
            player: current player

        Returns:
            validMoves: a binary vector of length self.getActionSize(), 1 for
                        moves that are valid from the current board and player,
                        0 for invalid moves
        """
        hand = np.array([board[:14]])
        last_move = []
        for i in np.argsort(-board[42:56]):
            for _ in range(int(board[42:56][i])):
                last_move.append(i)
        last_move = Play(last_move) if last_move else None

        game = GameState(hands=hand, last_move=last_move)
        valid_actions = [
            self.encoded_actions[tuple(action)]
            for action in game.legal_actions()
        ]
        one_hot = np.zeros(self.getActionSize())
        one_hot[valid_actions] = 1
        return one_hot
Exemplo n.º 2
0
    def simulate(self):
        current_state = self.state

        while current_state.get_winner() < 0:
            possible_moves = current_state.legal_actions()
            action = possible_moves[np.random.randint(len(possible_moves))]
            state_params = current_state.simulate(Play(action))
            current_state = GameState(*state_params)
        winner = current_state.get_winner()
        if self.player == 0:
            return int(winner == 0)
        else:
            return int(winner == 1 or winner == 2)
Exemplo n.º 3
0
def start_game(players, info=False, save_data = False):
    game = GameState()
    while game.get_winner() == -1:
        player = game.turn
        hands = game.hands[player]
        last_move = game.last_move
        last_deal = [] if last_move is None else last_move.cards
        possible_moves = game.legal_actions()
        played_cards = game.played_cards
        is_last_deal_landlord = int(game.last_move == 0)
        is_landlord = int(game.turn == 0)
        last_move = game.last_move
        last_deal = [] if last_move is None else last_move.cards

        possible_move_indices = [inv_map[tuple(i)] for i in possible_moves]

        if save_data:
            inputs.append(game.get_player_state(player))

        if type(players[player]) == Supervised:
            action = G.decoded_actions[int(players[player].play(torch.FloatTensor(game.get_player_state(player)), possible_move_indices))]
            action = list(action)
        else:
            players[player].current_state(hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord)
            action = players[player].play()

        #print(action)
        if game.turn == 0 and info:
            print("supervised:", action)
            players[1].current_state(hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord)
            action2 = players[1].play()
            print("correct:", action2)
        #print(game.turn)

        if save_data:
            target.append(inv_map[tuple(action)])
        # print(action)
        # print(G.decoded_actions[inv_map[tuple(action)]])
        #action = players[player].play(game.legal_actions(), player, game.hands[player], last_deal)
        play = Play(action)
        if info:
            print(hands)
            print(last_deal)
            print(f'player {game.turn}:', action)
            print()
        game.move(play)
    if info:
        print(f"Player {game.get_winner()} wins!")
    return game.get_winner()
Exemplo n.º 4
0
    def expand(self):
        action = self._untried_actions.pop()
        state_params = self.state.simulate(Play(action))
        next_state = GameState(*state_params)
        child_node = MonteCarloTreeSearchNode(next_state,
                                              self.player,
                                              parent=self,
                                              parent_action=Play(action))

        self.children.append(child_node)
        return child_node
Exemplo n.º 5
0
from doudizhu import GameState
import numpy as np
import pickle

game = GameState()
game.hands[0] = np.array([4] * 14)
game.hands[0, -1] = 2
possible_actions = game.legal_actions()
print(len(possible_actions))
# print(possible_actions[499:3886])
# print(possible_actions[-10:])

possible_actions_t = [tuple(t) for t in possible_actions]
action_encoder = {action: i for i, action in enumerate(possible_actions_t)}
with open('action_encoder.pt', 'wb') as f:
    pickle.dump(action_encoder, f)
Exemplo n.º 6
0
def main():
    n_games = 1000
    gamma = 0.01
    epsilon = 0.8
    lr = 0.001
    input_dims = 32
    batch_size = 64
    n_actions = len(encoded_actions)

    LandlordAI = Agent(gamma, epsilon, lr, [input_dims], batch_size, n_actions)
    PeasantAI = Agent(gamma, epsilon, lr, [input_dims], batch_size, n_actions)

    LandlordAI_wins = 0
    PeasantAI_wins = 0

    LandlordAI_winRates = []
    PeasantAI_winRates = []

    for i in range(n_games):
        if i % 50 == 0:
            print("game ", str(i))
        game = GameState()
        while game.get_winner() == -1:
            turn = game.turn
            observation = game.get_player_state(turn)
            possible_moves = game.legal_actions()
            possible_moves_indices = np.array(
                [encoded_actions[tuple(a)] for a in possible_moves])

            if turn == 0:
                action = LandlordAI.choose_action(observation,
                                                  possible_moves_indices)
                game.move(Play(decoded_actions[action]))
                observation_ = game.get_player_state(turn)
                if game.get_winner() != -1:
                    if game.get_winner() == 0:
                        reward = 1
                        LandlordAI_wins += 1
                    else:
                        reward = -1
                    done = True
                else:
                    reward = 0
                    done = False
                LandlordAI.store_transition(observation, action, reward,
                                            observation_, done)
                LandlordAI.learn()

            else:
                action = PeasantAI.choose_action(observation,
                                                 possible_moves_indices)
                game.move(Play(decoded_actions[action]))
                observation_ = game.get_player_state(turn)
                if game.get_winner() != -1:
                    if game.get_winner() == 0:
                        reward = -1
                    else:
                        reward = 1
                        PeasantAI_wins += 1
                    done = True
                else:
                    reward = 0
                    done = False
                PeasantAI.store_transition(observation, action, reward,
                                           observation_, done)
                PeasantAI.learn()

        LandlordAI_winRates.append(LandlordAI_wins / (i + 1))
        PeasantAI_winRates.append(PeasantAI_wins / (i + 1))

    plt.plot(LandlordAI_winRates)
    plt.plot(PeasantAI_winRates)
    plt.legend(['Landlord (DQN)', 'Peasant (DQN)'])
    plt.title('Win Rate vs. Games Played')
    plt.savefig('Win Rate vs. Games Played (DQN Landlord, DQN Peasant).png')

    print("Landlord Final Win Rate: ", str(LandlordAI_winRates[-1]))
    print("Peasant Final Win Rate: ", str(PeasantAI_winRates[-1]))
Exemplo n.º 7
0
def main():
    game = GameState()
    state = GameState(hands=game.hands+0)
    landlordAI = MonteCarloTreeSearchNode(state, 0)

    while game.get_winner() == -1:
        print(f'PLAYER {game.turn}\'s CARDS:')
        hand_str = ''
        for i, n in enumerate(game.hands[game.turn]):
            hand_str += ' '.join([CARD_STR[i]] * int(n)) + ' '
        print(hand_str)

        print('Your opponents hand sizes: ', end='')
        for i in range(3):
            if i != game.turn:
                print(sum(game.hands[i]), end=' ')
        print()

        if game.last_move != None:
            print('The play to beat: ', game.last_move)
        else:
            print('There is no play to beat')

        print('Legal Actions:')
        possible_moves = game.legal_actions()
        for i, action in enumerate(possible_moves[:-1]):
            print(f'{i}: {[CARD_STR[c] for c in action]}')

        while (True):
            if game.turn == 0:
                landlordAI = landlordAI.best_action()
                landlordAI.parent = None
                print(f'Landlord played a {landlordAI.parent_action.type}!')
                print(landlordAI.parent_action)
                input('Press anything to continue')
                game.move(landlordAI.parent_action)
                break
            else:
                move = input(
                    'Please enter your indexed move or enter PASS: '******'PASS' or move == 'P':
                    move = -1
                elif move.isnumeric() and int(move) < len(possible_moves):
                    move = int(move)
                else:
                    print('Invalid Move!')
                    continue

                move = possible_moves[move]
                play = Play(move)
                print(f'You played a {play.type}!')
                input('Press anything to continue')
                game.move(play)
                try:
                    landlordAI = landlordAI.children[move]
                    landlordAI.parent = None
                except:
                    state = GameState(hands=game.hands+0,
                                 last_move=game.last_move,
                                 turn=game.turn,
                                 passes=game.passes)
                    landlordAI = MonteCarloTreeSearchNode(state, 0)
                break
        print('\n\n')
    print(f'Player {game.get_winner()} wins!')