def main(agent): game = Game() while game.get_winner() == -1: print(f"PLAYER {game.turn}'s CARDS:") print(game.hands[game.turn]) print("Your opponents hand sizes: ", end="") for i in range(3): if i != game.turn: print(sum(game.hands[i]), end=" ") print() if game.last_move != None: print("The play to beat: ", game.last_move.cards) else: print("There is no play to beat") print("Legal Actions:") possible_moves = game.legal_actions() for i, action in enumerate(possible_moves[:-1]): print(f'{i}: {action}') while (True): if game.turn == 0: player = game.turn hands = game.hands[player] last_move = game.last_move last_deal = [] if last_move is None else last_move.cards possible_moves = game.legal_actions() agent.current_state(hands, last_deal, possible_moves) action = agent.play() print("Agent played", action) input("Press anything to continue") play = Play(action) game.move(play) break else: move = input("Please enter your indexed move or enter PASS: "******"PASS" or move == "P": move = -1 elif move.isnumeric() and int(move) < len(possible_moves): move = int(move) else: print('Invalid Move!') continue move = possible_moves[move] play = Play(move) print(f"You played a {play.type}!") input("Press anything to continue") game.move(play) break print("\n\n") print(f"Player {game.get_winner()} wins!")
def expand(self): action = self._untried_actions.pop() state_params = self.state.simulate(Play(action)) next_state = Game(*state_params) child_node = MonteCarloTreeSearchNode(next_state, self.player, parent=self, parent_action=Play(action)) self.children.append(child_node) return child_node
def play(self): # print("\n") # print(self.hands) # print("last play", self.last_deal) best = np.count_nonzero(self.hands) sum = np.sum([i*j for i,j in enumerate(self.hands)]) hand_size= np.sum(self.hands) best_arg = 0 tie_break = 0 if self.play_info.type == 'PASS': for i, num in enumerate(self.hands): if num>0 and num<4: #print([i for j in range(num)]) return [i for j in range(num)] for i, action in enumerate(self.possible_actions): hand_sum = 0 for card in action: hand_sum += card self.hands[card] -= 1 num_cards = np.count_nonzero(self.hands) if num_cards == 0: for card in action: self.hands[card] += 1 return action if num_cards < best: best = num_cards best_arg = i tie_break = (sum - hand_sum)/(hand_size - len(action)) if Play(action).type != "bomb": #punishment for using a bomb tie_break -= 3 elif self.temp and num_cards == best: new_tiebreak = (sum - hand_sum)/(hand_size - len(action)) if Play(action).type != "bomb": #punishment for using a bomb new_tiebreak -= 3 if new_tiebreak > tie_break and Play(action).type != "PASS": #print("comparing", self.possible_actions[best_arg], self.possible_actions[i]) best_arg = i tie_break = new_tiebreak for card in action: self.hands[card] += 1 # print(self.possible_actions[best_arg]) # # print("tiebreak is ", tie_break) #print("play made", self.possible_actions[best_arg]) return(self.possible_actions[best_arg])
def getValidMoves(self, board, player): """ Input: board: current board player: current player Returns: validMoves: a binary vector of length self.getActionSize(), 1 for moves that are valid from the current board and player, 0 for invalid moves """ hand = np.array([board[:14]]) last_move = [] for i in np.argsort(-board[42:56]): for _ in range(int(board[42:56][i])): last_move.append(i) last_move = Play(last_move) if last_move else None game = GameState(hands=hand, last_move=last_move) valid_actions = [ self.encoded_actions[tuple(action)] for action in game.legal_actions() ] one_hot = np.zeros(self.getActionSize()) one_hot[valid_actions] = 1 return one_hot
def current_state2(self, info, possible_moves): self.hands = info[:14] last_deal = info[14:28] self.last_deal = [] for i in range(len(last_deal)): for j in range(last_deal[i]): self.last_deal.append(i) self.play_info = Play(last_deal) self.possible_actions = possible_moves
def simulate(self): current_state = self.state while current_state.get_winner() < 0: possible_moves = current_state.legal_actions() action = possible_moves[np.random.randint(len(possible_moves))] state_params = current_state.simulate(Play(action)) current_state = Game(*state_params) winner = current_state.get_winner() if self.player == 0: return int(winner == 0) else: return int(winner == 1 or winner == 2)
def start_game(players, info=False, save_data = False): game = GameState() while game.get_winner() == -1: player = game.turn hands = game.hands[player] last_move = game.last_move last_deal = [] if last_move is None else last_move.cards possible_moves = game.legal_actions() played_cards = game.played_cards is_last_deal_landlord = int(game.last_move == 0) is_landlord = int(game.turn == 0) last_move = game.last_move last_deal = [] if last_move is None else last_move.cards possible_move_indices = [inv_map[tuple(i)] for i in possible_moves] if save_data: inputs.append(game.get_player_state(player)) if type(players[player]) == Supervised: action = G.decoded_actions[int(players[player].play(torch.FloatTensor(game.get_player_state(player)), possible_move_indices))] action = list(action) else: players[player].current_state(hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord) action = players[player].play() #print(action) if game.turn == 0 and info: print("supervised:", action) players[1].current_state(hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord) action2 = players[1].play() print("correct:", action2) #print(game.turn) if save_data: target.append(inv_map[tuple(action)]) # print(action) # print(G.decoded_actions[inv_map[tuple(action)]]) #action = players[player].play(game.legal_actions(), player, game.hands[player], last_deal) play = Play(action) if info: print(hands) print(last_deal) print(f'player {game.turn}:', action) print() game.move(play) if info: print(f"Player {game.get_winner()} wins!") return game.get_winner()
def generate_transitions(agent): win_reward = 1 no_reward = 0 lose_reward = -1 num_of_games = 1 for _ in range(num_of_games): game = Game() game_transitions = [] while game.get_winner() == -1: player = game.turn hands = game.hands[player] last_move = game.last_move last_deal = [] if last_move is None else last_move.cards possible_moves = game.legal_actions() played_cards = game.played_cards # last_move_player = game.last_move_player # last_move_player = [int(last_move_player == i) for i in range(3)] is_last_deal_landlord = int(game.last_move_player == 0) is_landlord = int(game.turn == 0) agent.current_state(hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord) current_state, action, score = agent.deal() play = Play(action) game.move(play) if game.get_winner() == -1: gt = GameTransition(current_state, score, no_reward, None) else: # if landlord wins if game.get_winner() == 0: game_transitions[-1].reward = lose_reward game_transitions[-2].reward = lose_reward # if farmer 1 wins elif game.get_winner() == 1: game_transitions[-1].reward = lose_reward game_transitions[-2].reward = win_reward # if farmer 2 wins else: game_transitions[-1].reward = win_reward game_transitions[-2].reward = lose_reward gt = GameTransition(current_state, score, win_reward, None) game_transitions.append(gt) # print(f"Player {game.get_winner()} wins!") return game_transitions
def vs_mcts(agent, info=False): game = Game() state1 = Game(hands=game.hands + 0) state2 = Game(hands=game.hands + 0) mcts_agent1 = MonteCarloTreeSearchNode(state1, 1) mcts_agent2 = MonteCarloTreeSearchNode(state2, 2) mcts = [mcts_agent1, mcts_agent2] if info: print('Game Start') while game.get_winner() == -1: if game.turn == 0: player = game.turn hands = game.hands[player] last_move = game.last_move last_deal = [] if last_move is None else last_move.cards possible_moves = game.legal_actions() played_cards = game.played_cards is_last_deal_landlord = int(game.last_move == 0) is_landlord = int(game.turn == 0) agent.current_state(hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord) action = agent.play() play = Play(action) if info: print('player 0:', ' '.join([CARD_STR[a] for a in action])) game.move(play) mcts[0] = landlordAI_move(mcts[0], game, action) mcts[1] = landlordAI_move(mcts[1], game, action) else: mcts_id = game.turn - 1 mcts_agent = mcts[mcts_id] mcts_agent = mcts_agent.best_action() mcts_agent.parent = None move = mcts_agent.parent_action mcts[mcts_id] = mcts_agent game.move(move) mcts[1 - mcts_id] = landlordAI_move(mcts[1 - mcts_id], game, move) #mcts[1] = landlordAI_move(mcts[1], game, move) if info: print(f'player {mcts_id + 1}:', move) if info: print(f"Player {game.get_winner()} wins!") return game.get_winner()
def start_game(players, info=False): game = Game() while game.get_winner() == -1: player = game.turn hands = game.hands[player] last_move = game.last_move last_deal = [] if last_move is None else last_move.cards possible_moves = game.legal_actions() played_cards = game.played_cards is_last_deal_landlord = int(game.last_move == 0) is_landlord = int(game.turn == 0) players[player].current_state(hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord) action = players[player].play() #action = players[player].play(game.legal_actions(), player, game.hands[player], last_deal) play = Play(action) if info: print(f'player {game.turn}:', action) game.move(play) if info: print(f"Player {game.get_winner()} wins!") return game.get_winner()
def current_state(self, hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord): self.possible_actions = possible_moves self.hands = hands self.last_deal = last_deal self.play_info = Play(last_deal) self.played_cards = played_cards
def main(): n_games = 1000 gamma = 0.01 epsilon = 0.8 lr = 0.001 input_dims = 32 batch_size = 64 n_actions = len(encoded_actions) LandlordAI = Agent(gamma, epsilon, lr, [input_dims], batch_size, n_actions) PeasantAI = Agent(gamma, epsilon, lr, [input_dims], batch_size, n_actions) LandlordAI_wins = 0 PeasantAI_wins = 0 LandlordAI_winRates = [] PeasantAI_winRates = [] for i in range(n_games): if i % 50 == 0: print("game ", str(i)) game = GameState() while game.get_winner() == -1: turn = game.turn observation = game.get_player_state(turn) possible_moves = game.legal_actions() possible_moves_indices = np.array( [encoded_actions[tuple(a)] for a in possible_moves]) if turn == 0: action = LandlordAI.choose_action(observation, possible_moves_indices) game.move(Play(decoded_actions[action])) observation_ = game.get_player_state(turn) if game.get_winner() != -1: if game.get_winner() == 0: reward = 1 LandlordAI_wins += 1 else: reward = -1 done = True else: reward = 0 done = False LandlordAI.store_transition(observation, action, reward, observation_, done) LandlordAI.learn() else: action = PeasantAI.choose_action(observation, possible_moves_indices) game.move(Play(decoded_actions[action])) observation_ = game.get_player_state(turn) if game.get_winner() != -1: if game.get_winner() == 0: reward = -1 else: reward = 1 PeasantAI_wins += 1 done = True else: reward = 0 done = False PeasantAI.store_transition(observation, action, reward, observation_, done) PeasantAI.learn() LandlordAI_winRates.append(LandlordAI_wins / (i + 1)) PeasantAI_winRates.append(PeasantAI_wins / (i + 1)) plt.plot(LandlordAI_winRates) plt.plot(PeasantAI_winRates) plt.legend(['Landlord (DQN)', 'Peasant (DQN)']) plt.title('Win Rate vs. Games Played') plt.savefig('Win Rate vs. Games Played (DQN Landlord, DQN Peasant).png') print("Landlord Final Win Rate: ", str(LandlordAI_winRates[-1])) print("Peasant Final Win Rate: ", str(PeasantAI_winRates[-1]))
def main(): game = GameState() state = GameState(hands=game.hands+0) landlordAI = MonteCarloTreeSearchNode(state, 0) while game.get_winner() == -1: print(f'PLAYER {game.turn}\'s CARDS:') hand_str = '' for i, n in enumerate(game.hands[game.turn]): hand_str += ' '.join([CARD_STR[i]] * int(n)) + ' ' print(hand_str) print('Your opponents hand sizes: ', end='') for i in range(3): if i != game.turn: print(sum(game.hands[i]), end=' ') print() if game.last_move != None: print('The play to beat: ', game.last_move) else: print('There is no play to beat') print('Legal Actions:') possible_moves = game.legal_actions() for i, action in enumerate(possible_moves[:-1]): print(f'{i}: {[CARD_STR[c] for c in action]}') while (True): if game.turn == 0: landlordAI = landlordAI.best_action() landlordAI.parent = None print(f'Landlord played a {landlordAI.parent_action.type}!') print(landlordAI.parent_action) input('Press anything to continue') game.move(landlordAI.parent_action) break else: move = input( 'Please enter your indexed move or enter PASS: '******'PASS' or move == 'P': move = -1 elif move.isnumeric() and int(move) < len(possible_moves): move = int(move) else: print('Invalid Move!') continue move = possible_moves[move] play = Play(move) print(f'You played a {play.type}!') input('Press anything to continue') game.move(play) try: landlordAI = landlordAI.children[move] landlordAI.parent = None except: state = GameState(hands=game.hands+0, last_move=game.last_move, turn=game.turn, passes=game.passes) landlordAI = MonteCarloTreeSearchNode(state, 0) break print('\n\n') print(f'Player {game.get_winner()} wins!')
def main(): game = Game() state = Game(hands=game.hands + 0) landlordAI = MonteCarloTreeSearchNode(state, 0) agent = PGAgent(learning_rate=0.01, device='cpu') Naive = NaiveGreedy() Random = RandomPlayer() Smart = SmartGreedy() load_model(agent.model, "PG_param.pth") all_players = [ "MonteCarlo", "PGAgent", "Naive", "Random", "Smart", "Human" ] #MonteCarlo can only play as landlord players = ["Human", "PGAgent", "Smart"] while game.get_winner() == -1: player = game.turn print(f"PLAYER {game.turn}'s CARDS:") print(hand_to_string(game.hands[game.turn])) print("Your opponents hand sizes: ", end="") for i in range(3): if i != game.turn: print(sum(game.hands[i]), end=" ") print() if game.last_move != None: print("The play to beat: ", indices_to_string(game.last_move.cards)) else: print("There is no play to beat") if players[player] == "Human": print("Legal Actions:") possible_moves = game.legal_actions() for i, action in enumerate(possible_moves): print(f'{i}: {indices_to_string(action)}') while (True): if players[player] == "MonteCarlo": landlordAI = landlordAI.best_action() landlordAI.parent = None print( f"MonteCarlo played {indices_to_string(landlordAI.parent_action.cards)}!" ) input("Press anything to continue") game.move(landlordAI.parent_action) break elif players[player] == "PGAgent" or players[ player] == "Smart" or players[ player] == "Naive" or players[player] == "Random": player = game.turn hands = game.hands[player] last_move = game.last_move last_deal = [] if last_move is None else last_move.cards possible_moves = game.legal_actions() played_cards = game.played_cards is_last_deal_landlord = int(game.last_move == 0) is_landlord = int(game.turn == 0) if players[player] == "PGAgent": agent.current_state(hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord) action = Play(agent.play()) print(f"PGAgent played {indices_to_string(action.cards)}!") elif players[player] == "Smart": Smart.current_state(hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord) action = Play(Smart.play()) print( f"Smart Greedy played {indices_to_string(action.cards)}!" ) elif players[player] == "Naive": Naive.current_state(hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord) action = Play(Naive.play()) print( f"Naive Greedy played {indices_to_string(action.cards)}!" ) elif players[player] == "Random": Random.current_state(hands, last_deal, possible_moves, played_cards, is_landlord, is_last_deal_landlord) action = Play(Random.play()) print(f"Random played {indices_to_string(action.cards)}!") input("Press anything to continue") game.move(action) break else: move = input("Please enter your indexed move or enter PASS: "******"PASS" or move == "P": move = -1 elif move.isnumeric() and int(move) < len(possible_moves): move = int(move) else: print('Invalid Move!') continue move = possible_moves[move] play = Play(move) print(f"You played a {play.type}!") input("Press anything to continue") game.move(play) try: landlordAI = landlordAI.children[move] landlordAI.parent = None except: state = Game(hands=game.hands + 0, last_move=game.last_move, turn=game.turn, passes=game.passes) landlordAI = MonteCarloTreeSearchNode(state, 0) break print("\n\n") print(f"Player {game.get_winner()}, {players[game.get_winner()]} wins!")