def getValidMoves(self, board, player): """ Input: board: current board player: current player Returns: validMoves: a binary vector of length self.getActionSize(), 1 for moves that are valid from the current board and player, 0 for invalid moves """ hand = np.array([board[:14]]) last_move = [] for i in np.argsort(-board[42:56]): for _ in range(int(board[42:56][i])): last_move.append(i) last_move = Play(last_move) if last_move else None game = GameState(hands=hand, last_move=last_move) valid_actions = [ self.encoded_actions[tuple(action)] for action in game.legal_actions() ] one_hot = np.zeros(self.getActionSize()) one_hot[valid_actions] = 1 return one_hot
def simulate(self): current_state = self.state while current_state.get_winner() < 0: possible_moves = current_state.legal_actions() action = possible_moves[np.random.randint(len(possible_moves))] state_params = current_state.simulate(Play(action)) current_state = GameState(*state_params) winner = current_state.get_winner() if self.player == 0: return int(winner == 0) else: return int(winner == 1 or winner == 2)
def start_game(players, info=False, save_data = False): game = GameState() while game.get_winner() == -1: player = game.turn hands = game.hands[player] last_move = game.last_move last_deal = [] if last_move is None else last_move.cards possible_moves = game.legal_actions() played_cards = game.played_cards is_last_deal_landlord = int(game.last_move == 0) is_landlord = int(game.turn == 0) last_move = game.last_move last_deal = [] if last_move is None else last_move.cards possible_move_indices = [inv_map[tuple(i)] for i in possible_moves] if save_data: inputs.append(game.get_player_state(player)) if type(players[player]) == Supervised: action = G.decoded_actions[int(players[player].play(torch.FloatTensor(game.get_player_state(player)), possible_move_indices))] action = list(action) else: players[player].current_state(hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord) action = players[player].play() #print(action) if game.turn == 0 and info: print("supervised:", action) players[1].current_state(hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord) action2 = players[1].play() print("correct:", action2) #print(game.turn) if save_data: target.append(inv_map[tuple(action)]) # print(action) # print(G.decoded_actions[inv_map[tuple(action)]]) #action = players[player].play(game.legal_actions(), player, game.hands[player], last_deal) play = Play(action) if info: print(hands) print(last_deal) print(f'player {game.turn}:', action) print() game.move(play) if info: print(f"Player {game.get_winner()} wins!") return game.get_winner()
def expand(self): action = self._untried_actions.pop() state_params = self.state.simulate(Play(action)) next_state = GameState(*state_params) child_node = MonteCarloTreeSearchNode(next_state, self.player, parent=self, parent_action=Play(action)) self.children.append(child_node) return child_node
from doudizhu import GameState import numpy as np import pickle game = GameState() game.hands[0] = np.array([4] * 14) game.hands[0, -1] = 2 possible_actions = game.legal_actions() print(len(possible_actions)) # print(possible_actions[499:3886]) # print(possible_actions[-10:]) possible_actions_t = [tuple(t) for t in possible_actions] action_encoder = {action: i for i, action in enumerate(possible_actions_t)} with open('action_encoder.pt', 'wb') as f: pickle.dump(action_encoder, f)
def main(): n_games = 1000 gamma = 0.01 epsilon = 0.8 lr = 0.001 input_dims = 32 batch_size = 64 n_actions = len(encoded_actions) LandlordAI = Agent(gamma, epsilon, lr, [input_dims], batch_size, n_actions) PeasantAI = Agent(gamma, epsilon, lr, [input_dims], batch_size, n_actions) LandlordAI_wins = 0 PeasantAI_wins = 0 LandlordAI_winRates = [] PeasantAI_winRates = [] for i in range(n_games): if i % 50 == 0: print("game ", str(i)) game = GameState() while game.get_winner() == -1: turn = game.turn observation = game.get_player_state(turn) possible_moves = game.legal_actions() possible_moves_indices = np.array( [encoded_actions[tuple(a)] for a in possible_moves]) if turn == 0: action = LandlordAI.choose_action(observation, possible_moves_indices) game.move(Play(decoded_actions[action])) observation_ = game.get_player_state(turn) if game.get_winner() != -1: if game.get_winner() == 0: reward = 1 LandlordAI_wins += 1 else: reward = -1 done = True else: reward = 0 done = False LandlordAI.store_transition(observation, action, reward, observation_, done) LandlordAI.learn() else: action = PeasantAI.choose_action(observation, possible_moves_indices) game.move(Play(decoded_actions[action])) observation_ = game.get_player_state(turn) if game.get_winner() != -1: if game.get_winner() == 0: reward = -1 else: reward = 1 PeasantAI_wins += 1 done = True else: reward = 0 done = False PeasantAI.store_transition(observation, action, reward, observation_, done) PeasantAI.learn() LandlordAI_winRates.append(LandlordAI_wins / (i + 1)) PeasantAI_winRates.append(PeasantAI_wins / (i + 1)) plt.plot(LandlordAI_winRates) plt.plot(PeasantAI_winRates) plt.legend(['Landlord (DQN)', 'Peasant (DQN)']) plt.title('Win Rate vs. Games Played') plt.savefig('Win Rate vs. Games Played (DQN Landlord, DQN Peasant).png') print("Landlord Final Win Rate: ", str(LandlordAI_winRates[-1])) print("Peasant Final Win Rate: ", str(PeasantAI_winRates[-1]))
def main(): game = GameState() state = GameState(hands=game.hands+0) landlordAI = MonteCarloTreeSearchNode(state, 0) while game.get_winner() == -1: print(f'PLAYER {game.turn}\'s CARDS:') hand_str = '' for i, n in enumerate(game.hands[game.turn]): hand_str += ' '.join([CARD_STR[i]] * int(n)) + ' ' print(hand_str) print('Your opponents hand sizes: ', end='') for i in range(3): if i != game.turn: print(sum(game.hands[i]), end=' ') print() if game.last_move != None: print('The play to beat: ', game.last_move) else: print('There is no play to beat') print('Legal Actions:') possible_moves = game.legal_actions() for i, action in enumerate(possible_moves[:-1]): print(f'{i}: {[CARD_STR[c] for c in action]}') while (True): if game.turn == 0: landlordAI = landlordAI.best_action() landlordAI.parent = None print(f'Landlord played a {landlordAI.parent_action.type}!') print(landlordAI.parent_action) input('Press anything to continue') game.move(landlordAI.parent_action) break else: move = input( 'Please enter your indexed move or enter PASS: '******'PASS' or move == 'P': move = -1 elif move.isnumeric() and int(move) < len(possible_moves): move = int(move) else: print('Invalid Move!') continue move = possible_moves[move] play = Play(move) print(f'You played a {play.type}!') input('Press anything to continue') game.move(play) try: landlordAI = landlordAI.children[move] landlordAI.parent = None except: state = GameState(hands=game.hands+0, last_move=game.last_move, turn=game.turn, passes=game.passes) landlordAI = MonteCarloTreeSearchNode(state, 0) break print('\n\n') print(f'Player {game.get_winner()} wins!')