Python Play示例，doudizhu.Play Python示例

示例#1

0

显示文件

文件： train_pg.py 项目： hcui11/DouDiZhu

def main(agent):
    game = Game()

    while game.get_winner() == -1:
        print(f"PLAYER {game.turn}'s CARDS:")
        print(game.hands[game.turn])

        print("Your opponents hand sizes: ", end="")
        for i in range(3):
            if i != game.turn:
                print(sum(game.hands[i]), end=" ")
        print()

        if game.last_move != None:
            print("The play to beat: ", game.last_move.cards)
        else:
            print("There is no play to beat")

        print("Legal Actions:")
        possible_moves = game.legal_actions()
        for i, action in enumerate(possible_moves[:-1]):
            print(f'{i}: {action}')

        while (True):
            if game.turn == 0:
                player = game.turn
                hands = game.hands[player]
                last_move = game.last_move
                last_deal = [] if last_move is None else last_move.cards
                possible_moves = game.legal_actions()

                agent.current_state(hands, last_deal, possible_moves)
                action = agent.play()
                print("Agent played", action)
                input("Press anything to continue")
                play = Play(action)
                game.move(play)
                break
            else:
                move = input("Please enter your indexed move or enter PASS: "******"PASS" or move == "P":
                    move = -1
                elif move.isnumeric() and int(move) < len(possible_moves):
                    move = int(move)
                else:
                    print('Invalid Move!')
                    continue

                move = possible_moves[move]
                play = Play(move)
                print(f"You played a {play.type}!")
                input("Press anything to continue")
                game.move(play)
                break

        print("\n\n")
    print(f"Player {game.get_winner()} wins!")

示例#2

0

显示文件

    def expand(self):
        action = self._untried_actions.pop()
        state_params = self.state.simulate(Play(action))
        next_state = Game(*state_params)
        child_node = MonteCarloTreeSearchNode(next_state,
                                              self.player,
                                              parent=self,
                                              parent_action=Play(action))

        self.children.append(child_node)
        return child_node

示例#3

0

显示文件

    def play(self):
        # print("\n")
        # print(self.hands)
        # print("last play", self.last_deal)

        best = np.count_nonzero(self.hands)
        sum = np.sum([i*j for i,j in enumerate(self.hands)])
        hand_size= np.sum(self.hands)
        best_arg = 0
        tie_break = 0

        if self.play_info.type == 'PASS':
            for i, num in enumerate(self.hands):
                if num>0 and num<4:
                    #print([i for j in range(num)])
                    return [i for j in range(num)]

        for i, action in enumerate(self.possible_actions):
            hand_sum = 0
            for card in action:
                hand_sum += card
                self.hands[card] -= 1
            num_cards = np.count_nonzero(self.hands)
            if num_cards == 0:
                for card in action:
                    self.hands[card] += 1
                return action
            if num_cards < best:
                best = num_cards
                best_arg = i
                tie_break = (sum - hand_sum)/(hand_size - len(action))
                if Play(action).type != "bomb": #punishment for using a bomb
                    tie_break -= 3
            elif self.temp and num_cards == best:
                new_tiebreak = (sum - hand_sum)/(hand_size - len(action))
                if Play(action).type != "bomb": #punishment for using a bomb
                    new_tiebreak -= 3
                if new_tiebreak > tie_break and Play(action).type != "PASS":
                    #print("comparing", self.possible_actions[best_arg], self.possible_actions[i])
                    best_arg = i
                    tie_break = new_tiebreak
            for card in action:
                self.hands[card] += 1
        # print(self.possible_actions[best_arg])
        #
        # print("tiebreak is ", tie_break)
        #print("play made", self.possible_actions[best_arg])
        return(self.possible_actions[best_arg])

示例#4

0

显示文件

文件： Game.py 项目： hcui11/DouDiZhu

    def getValidMoves(self, board, player):
        """
        Input:
            board: current board
            player: current player

        Returns:
            validMoves: a binary vector of length self.getActionSize(), 1 for
                        moves that are valid from the current board and player,
                        0 for invalid moves
        """
        hand = np.array([board[:14]])
        last_move = []
        for i in np.argsort(-board[42:56]):
            for _ in range(int(board[42:56][i])):
                last_move.append(i)
        last_move = Play(last_move) if last_move else None

        game = GameState(hands=hand, last_move=last_move)
        valid_actions = [
            self.encoded_actions[tuple(action)]
            for action in game.legal_actions()
        ]
        one_hot = np.zeros(self.getActionSize())
        one_hot[valid_actions] = 1
        return one_hot

示例#5

0

显示文件

 def current_state2(self, info, possible_moves):
     self.hands = info[:14]
     last_deal = info[14:28]
     self.last_deal = []
     for i in range(len(last_deal)):
         for j in range(last_deal[i]):
             self.last_deal.append(i)
     self.play_info = Play(last_deal)
     self.possible_actions = possible_moves

示例#6

0

显示文件

    def simulate(self):
        current_state = self.state

        while current_state.get_winner() < 0:
            possible_moves = current_state.legal_actions()
            action = possible_moves[np.random.randint(len(possible_moves))]
            state_params = current_state.simulate(Play(action))
            current_state = Game(*state_params)
        winner = current_state.get_winner()
        if self.player == 0:
            return int(winner == 0)
        else:
            return int(winner == 1 or winner == 2)

示例#7

0

显示文件

def start_game(players, info=False, save_data = False):
    game = GameState()
    while game.get_winner() == -1:
        player = game.turn
        hands = game.hands[player]
        last_move = game.last_move
        last_deal = [] if last_move is None else last_move.cards
        possible_moves = game.legal_actions()
        played_cards = game.played_cards
        is_last_deal_landlord = int(game.last_move == 0)
        is_landlord = int(game.turn == 0)
        last_move = game.last_move
        last_deal = [] if last_move is None else last_move.cards

        possible_move_indices = [inv_map[tuple(i)] for i in possible_moves]

        if save_data:
            inputs.append(game.get_player_state(player))

        if type(players[player]) == Supervised:
            action = G.decoded_actions[int(players[player].play(torch.FloatTensor(game.get_player_state(player)), possible_move_indices))]
            action = list(action)
        else:
            players[player].current_state(hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord)
            action = players[player].play()

        #print(action)
        if game.turn == 0 and info:
            print("supervised:", action)
            players[1].current_state(hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord)
            action2 = players[1].play()
            print("correct:", action2)
        #print(game.turn)

        if save_data:
            target.append(inv_map[tuple(action)])
        # print(action)
        # print(G.decoded_actions[inv_map[tuple(action)]])
        #action = players[player].play(game.legal_actions(), player, game.hands[player], last_deal)
        play = Play(action)
        if info:
            print(hands)
            print(last_deal)
            print(f'player {game.turn}:', action)
            print()
        game.move(play)
    if info:
        print(f"Player {game.get_winner()} wins!")
    return game.get_winner()

示例#8

0

显示文件

文件： train_pg.py 项目： hcui11/DouDiZhu

def generate_transitions(agent):

    win_reward = 1
    no_reward = 0
    lose_reward = -1

    num_of_games = 1
    for _ in range(num_of_games):
        game = Game()

        game_transitions = []
        while game.get_winner() == -1:
            player = game.turn
            hands = game.hands[player]
            last_move = game.last_move
            last_deal = [] if last_move is None else last_move.cards
            possible_moves = game.legal_actions()
            played_cards = game.played_cards
            # last_move_player = game.last_move_player
            # last_move_player = [int(last_move_player == i) for i in range(3)]
            is_last_deal_landlord = int(game.last_move_player == 0)
            is_landlord = int(game.turn == 0)

            agent.current_state(hands, last_deal, possible_moves, played_cards,
                                is_landlord, is_last_deal_landlord)
            current_state, action, score = agent.deal()

            play = Play(action)
            game.move(play)
            if game.get_winner() == -1:
                gt = GameTransition(current_state, score, no_reward, None)
            else:
                # if landlord wins
                if game.get_winner() == 0:
                    game_transitions[-1].reward = lose_reward
                    game_transitions[-2].reward = lose_reward
                # if farmer 1 wins
                elif game.get_winner() == 1:
                    game_transitions[-1].reward = lose_reward
                    game_transitions[-2].reward = win_reward
                # if farmer 2 wins
                else:
                    game_transitions[-1].reward = win_reward
                    game_transitions[-2].reward = lose_reward
                gt = GameTransition(current_state, score, win_reward, None)
            game_transitions.append(gt)
        # print(f"Player {game.get_winner()} wins!")
    return game_transitions

示例#9

0

显示文件

文件： train_pg.py 项目： hcui11/DouDiZhu

def vs_mcts(agent, info=False):
    game = Game()
    state1 = Game(hands=game.hands + 0)
    state2 = Game(hands=game.hands + 0)
    mcts_agent1 = MonteCarloTreeSearchNode(state1, 1)
    mcts_agent2 = MonteCarloTreeSearchNode(state2, 2)
    mcts = [mcts_agent1, mcts_agent2]
    if info:
        print('Game Start')
    while game.get_winner() == -1:
        if game.turn == 0:

            player = game.turn
            hands = game.hands[player]
            last_move = game.last_move
            last_deal = [] if last_move is None else last_move.cards
            possible_moves = game.legal_actions()
            played_cards = game.played_cards
            is_last_deal_landlord = int(game.last_move == 0)
            is_landlord = int(game.turn == 0)

            agent.current_state(hands, last_deal, possible_moves, played_cards,
                                is_landlord, is_last_deal_landlord)
            action = agent.play()
            play = Play(action)
            if info:
                print('player 0:', ' '.join([CARD_STR[a] for a in action]))
            game.move(play)

            mcts[0] = landlordAI_move(mcts[0], game, action)
            mcts[1] = landlordAI_move(mcts[1], game, action)
        else:
            mcts_id = game.turn - 1
            mcts_agent = mcts[mcts_id]
            mcts_agent = mcts_agent.best_action()
            mcts_agent.parent = None
            move = mcts_agent.parent_action
            mcts[mcts_id] = mcts_agent
            game.move(move)
            mcts[1 - mcts_id] = landlordAI_move(mcts[1 - mcts_id], game, move)
            #mcts[1] = landlordAI_move(mcts[1], game, move)
            if info:
                print(f'player {mcts_id + 1}:', move)
    if info:
        print(f"Player {game.get_winner()} wins!")
    return game.get_winner()

示例#10

0

显示文件

文件： train_pg.py 项目： hcui11/DouDiZhu

def start_game(players, info=False):
    game = Game()
    while game.get_winner() == -1:
        player = game.turn
        hands = game.hands[player]
        last_move = game.last_move
        last_deal = [] if last_move is None else last_move.cards
        possible_moves = game.legal_actions()
        played_cards = game.played_cards
        is_last_deal_landlord = int(game.last_move == 0)
        is_landlord = int(game.turn == 0)

        players[player].current_state(hands, last_deal, possible_moves,
                                      played_cards, is_landlord,
                                      is_last_deal_landlord)
        action = players[player].play()
        #action = players[player].play(game.legal_actions(), player, game.hands[player], last_deal)
        play = Play(action)
        if info:
            print(f'player {game.turn}:', action)
        game.move(play)
    if info:
        print(f"Player {game.get_winner()} wins!")
    return game.get_winner()

示例#11

0

显示文件

 def current_state(self, hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord):
     self.possible_actions = possible_moves
     self.hands = hands
     self.last_deal = last_deal
     self.play_info = Play(last_deal)
     self.played_cards = played_cards

示例#12

0

显示文件

文件： DQN_train.py 项目： hcui11/DouDiZhu

def main():
    n_games = 1000
    gamma = 0.01
    epsilon = 0.8
    lr = 0.001
    input_dims = 32
    batch_size = 64
    n_actions = len(encoded_actions)

    LandlordAI = Agent(gamma, epsilon, lr, [input_dims], batch_size, n_actions)
    PeasantAI = Agent(gamma, epsilon, lr, [input_dims], batch_size, n_actions)

    LandlordAI_wins = 0
    PeasantAI_wins = 0

    LandlordAI_winRates = []
    PeasantAI_winRates = []

    for i in range(n_games):
        if i % 50 == 0:
            print("game ", str(i))
        game = GameState()
        while game.get_winner() == -1:
            turn = game.turn
            observation = game.get_player_state(turn)
            possible_moves = game.legal_actions()
            possible_moves_indices = np.array(
                [encoded_actions[tuple(a)] for a in possible_moves])

            if turn == 0:
                action = LandlordAI.choose_action(observation,
                                                  possible_moves_indices)
                game.move(Play(decoded_actions[action]))
                observation_ = game.get_player_state(turn)
                if game.get_winner() != -1:
                    if game.get_winner() == 0:
                        reward = 1
                        LandlordAI_wins += 1
                    else:
                        reward = -1
                    done = True
                else:
                    reward = 0
                    done = False
                LandlordAI.store_transition(observation, action, reward,
                                            observation_, done)
                LandlordAI.learn()

            else:
                action = PeasantAI.choose_action(observation,
                                                 possible_moves_indices)
                game.move(Play(decoded_actions[action]))
                observation_ = game.get_player_state(turn)
                if game.get_winner() != -1:
                    if game.get_winner() == 0:
                        reward = -1
                    else:
                        reward = 1
                        PeasantAI_wins += 1
                    done = True
                else:
                    reward = 0
                    done = False
                PeasantAI.store_transition(observation, action, reward,
                                           observation_, done)
                PeasantAI.learn()

        LandlordAI_winRates.append(LandlordAI_wins / (i + 1))
        PeasantAI_winRates.append(PeasantAI_wins / (i + 1))

    plt.plot(LandlordAI_winRates)
    plt.plot(PeasantAI_winRates)
    plt.legend(['Landlord (DQN)', 'Peasant (DQN)'])
    plt.title('Win Rate vs. Games Played')
    plt.savefig('Win Rate vs. Games Played (DQN Landlord, DQN Peasant).png')

    print("Landlord Final Win Rate: ", str(LandlordAI_winRates[-1]))
    print("Peasant Final Win Rate: ", str(PeasantAI_winRates[-1]))

示例#13

0

显示文件

文件： play.py 项目： hcui11/DouDiZhu

def main():
    game = GameState()
    state = GameState(hands=game.hands+0)
    landlordAI = MonteCarloTreeSearchNode(state, 0)

    while game.get_winner() == -1:
        print(f'PLAYER {game.turn}\'s CARDS:')
        hand_str = ''
        for i, n in enumerate(game.hands[game.turn]):
            hand_str += ' '.join([CARD_STR[i]] * int(n)) + ' '
        print(hand_str)

        print('Your opponents hand sizes: ', end='')
        for i in range(3):
            if i != game.turn:
                print(sum(game.hands[i]), end=' ')
        print()

        if game.last_move != None:
            print('The play to beat: ', game.last_move)
        else:
            print('There is no play to beat')

        print('Legal Actions:')
        possible_moves = game.legal_actions()
        for i, action in enumerate(possible_moves[:-1]):
            print(f'{i}: {[CARD_STR[c] for c in action]}')

        while (True):
            if game.turn == 0:
                landlordAI = landlordAI.best_action()
                landlordAI.parent = None
                print(f'Landlord played a {landlordAI.parent_action.type}!')
                print(landlordAI.parent_action)
                input('Press anything to continue')
                game.move(landlordAI.parent_action)
                break
            else:
                move = input(
                    'Please enter your indexed move or enter PASS: '******'PASS' or move == 'P':
                    move = -1
                elif move.isnumeric() and int(move) < len(possible_moves):
                    move = int(move)
                else:
                    print('Invalid Move!')
                    continue

                move = possible_moves[move]
                play = Play(move)
                print(f'You played a {play.type}!')
                input('Press anything to continue')
                game.move(play)
                try:
                    landlordAI = landlordAI.children[move]
                    landlordAI.parent = None
                except:
                    state = GameState(hands=game.hands+0,
                                 last_move=game.last_move,
                                 turn=game.turn,
                                 passes=game.passes)
                    landlordAI = MonteCarloTreeSearchNode(state, 0)
                break
        print('\n\n')
    print(f'Player {game.get_winner()} wins!')

示例#14

0

显示文件

def main():
    game = Game()
    state = Game(hands=game.hands + 0)
    landlordAI = MonteCarloTreeSearchNode(state, 0)
    agent = PGAgent(learning_rate=0.01, device='cpu')
    Naive = NaiveGreedy()
    Random = RandomPlayer()
    Smart = SmartGreedy()

    load_model(agent.model, "PG_param.pth")
    all_players = [
        "MonteCarlo", "PGAgent", "Naive", "Random", "Smart", "Human"
    ]
    #MonteCarlo can only play as landlord
    players = ["Human", "PGAgent", "Smart"]

    while game.get_winner() == -1:

        player = game.turn
        print(f"PLAYER {game.turn}'s CARDS:")
        print(hand_to_string(game.hands[game.turn]))

        print("Your opponents hand sizes: ", end="")
        for i in range(3):
            if i != game.turn:
                print(sum(game.hands[i]), end=" ")
        print()

        if game.last_move != None:
            print("The play to beat: ",
                  indices_to_string(game.last_move.cards))
        else:
            print("There is no play to beat")

        if players[player] == "Human":
            print("Legal Actions:")
            possible_moves = game.legal_actions()
            for i, action in enumerate(possible_moves):
                print(f'{i}: {indices_to_string(action)}')

        while (True):
            if players[player] == "MonteCarlo":
                landlordAI = landlordAI.best_action()
                landlordAI.parent = None
                print(
                    f"MonteCarlo played {indices_to_string(landlordAI.parent_action.cards)}!"
                )
                input("Press anything to continue")
                game.move(landlordAI.parent_action)
                break
            elif players[player] == "PGAgent" or players[
                    player] == "Smart" or players[
                        player] == "Naive" or players[player] == "Random":
                player = game.turn
                hands = game.hands[player]
                last_move = game.last_move
                last_deal = [] if last_move is None else last_move.cards
                possible_moves = game.legal_actions()
                played_cards = game.played_cards
                is_last_deal_landlord = int(game.last_move == 0)
                is_landlord = int(game.turn == 0)
                if players[player] == "PGAgent":
                    agent.current_state(hands, last_deal, possible_moves,
                                        played_cards, is_landlord,
                                        is_last_deal_landlord)
                    action = Play(agent.play())
                    print(f"PGAgent played {indices_to_string(action.cards)}!")
                elif players[player] == "Smart":
                    Smart.current_state(hands, last_deal, possible_moves,
                                        played_cards, is_landlord,
                                        is_last_deal_landlord)
                    action = Play(Smart.play())
                    print(
                        f"Smart Greedy played {indices_to_string(action.cards)}!"
                    )
                elif players[player] == "Naive":
                    Naive.current_state(hands, last_deal, possible_moves,
                                        played_cards, is_landlord,
                                        is_last_deal_landlord)
                    action = Play(Naive.play())
                    print(
                        f"Naive Greedy played {indices_to_string(action.cards)}!"
                    )
                elif players[player] == "Random":
                    Random.current_state(hands, last_deal, possible_moves,
                                         played_cards, is_landlord,
                                         is_last_deal_landlord)
                    action = Play(Random.play())
                    print(f"Random played {indices_to_string(action.cards)}!")

                input("Press anything to continue")
                game.move(action)
                break
            else:
                move = input("Please enter your indexed move or enter PASS: "******"PASS" or move == "P":
                    move = -1
                elif move.isnumeric() and int(move) < len(possible_moves):
                    move = int(move)
                else:
                    print('Invalid Move!')
                    continue

                move = possible_moves[move]
                play = Play(move)
                print(f"You played a {play.type}!")
                input("Press anything to continue")
                game.move(play)
                try:
                    landlordAI = landlordAI.children[move]
                    landlordAI.parent = None
                except:
                    state = Game(hands=game.hands + 0,
                                 last_move=game.last_move,
                                 turn=game.turn,
                                 passes=game.passes)
                    landlordAI = MonteCarloTreeSearchNode(state, 0)
                break
        print("\n\n")
    print(f"Player {game.get_winner()}, {players[game.get_winner()]} wins!")