def winner(self): """ Who won the game (if it's finished). :return: The (integer) id of the player who won if the game is finished (1 or 2). (None, None) if the game is not finished. """ winner = None points = None if self.__revoked is not None: # Thanks: Joshua Kenyon return util.other(self.__revoked), 3 if self.__p1_points >= 66: winner = 1 elif self.__p2_points >= 66: winner = 2 other_player_points = self.get_points(util.other(winner)) if other_player_points == 0: points = 3 elif other_player_points < 33: points = 2 else: points = 1 return winner, points
def heuristic_1a(self, player: int, depth: int, curr_state: State) -> float: if (curr_state.get_points(util.other(player)) + curr_state.get_points(player)) == 0: return 0 return curr_state.get_points( util.other(player)) / (curr_state.get_points(util.other(player)) + curr_state.get_points(player))
def __add_partial_trick_to_perspective(self, trick, player): """ Adds the card in the trick to the specified player's perspective :param trick: A tuple signifying the trick which cards are revealed to the player :param player: An integer signifying the player id """ if player == 1: self.__p1_perspective[trick[util.other(player) - 1]] = "P2H" else: self.__p2_perspective[trick[util.other(player) - 1]] = "P1H"
def get_move(self, state): # Find out which player we are my_id = state.whose_turn() # Our move: these will contain Planet objects source = None dest = None source_strength = -1 # source score must end up as large as possible (start with very low value) dest_strength = float( 'inf' ) # destination score must end up as small as possible (start with high value) # Find my strongest planet (largest number of stationed ships) for mine in state.planets(my_id): strength = state.garrison(mine) if strength > 1 and strength > source_strength: source_strength = strength source = mine # Find the weakest enemy or neutral planet (smallest number of ships). for his in (state.planets(u.other(my_id)) + state.planets(0)): strength = state.garrison(his) if strength < dest_strength: dest_strength = strength dest = his if source is None or dest is None: return None return source.id(), dest.id()
def __evaluate_trick(self, trick): """ Evaluates who the winner of the specified trick is and returns it :param trick: A tuple signifying the trick which is evaluated :return: The winner's id as an integer """ if len(trick) != 2: raise RuntimeError("Incorrect trick format. List of length 2 needed.") if trick[0] is None or trick[1] is None: raise RuntimeError("An incomplete trick was attempted to be evaluated.") # If the two cards of the trick have the same suit if Deck.get_suit(trick[0]) == Deck.get_suit(trick[1]): # We only compare indices since the convention we defined in Deck # puts higher rank cards at lower indices, when considering the same color. return 1 if trick[0] < trick[1] else 2 if Deck.get_suit(trick[0]) == self.__deck.get_trump_suit(): return 1 if Deck.get_suit(trick[1]) == self.__deck.get_trump_suit(): return 2 # If the control flow has reached this point, the trick consists of two # different non-trump cards. Since the new leader is determined by the # output of this function, at this point the state object still considers # it to be the non-leading player's turn. Thus, we determine that the winner # is the other player, i.e. the leading player. Thanks: Daan Raven return util.other(self.whose_turn())
def winning_game(self, state): player = state. whose_turn() opponent = util.other(player) player_points = state.get_points(player) opponent_points = state.get_points(opponent) return opponent_points < player_points
def expand(self): player = self.state.whose_turn() moves = self.state.moves() for move in moves: new_state = self.state.next(move) child_node = Node(new_state, self, move, [], util.other(player)) self.children.append(child_node)
def action_cost(self, player, depth, curr_state) -> float: def backward_cost(): eval_vec = [self.heuristic_1a(player, depth, curr_state)] return self.heuristics_eval(eval_vec) def forward_cost(): eval_vec = [self.heuristic_2a(player, depth, curr_state)] return self.heuristics_eval(eval_vec) if self.__WIN_SCORE <= curr_state.get_points(util.other(player)): return -1 elif self.__WIN_SCORE <= curr_state.get_points(player): return 1 return (backward_cost() + forward_cost()) / 2
def action_cost(self, player, depth, state) -> float: def backward_cost(): return (state.get_points(util.other(player)) ) / self.__WIN_SCORE # the opponent's score + depth def forward_cost(): return ( self.__WIN_SCORE - state.get_points(player) ) / self.__WIN_SCORE # my score --> underestimate, can be negative if self.__WIN_SCORE <= state.get_points(util.other(player)): return -1 elif self.__WIN_SCORE <= state.get_points(player): return 1 return (backward_cost() + forward_cost()) / 2
def cant_win_trick(self, state): player = state.whose_turn() opponent = util.other(player) leader = state.leader() if opponent == leader: opponents_played_card = state.get_opponents_played_card() current_opponent_points = state.get_points(opponent) current_points = state.get_points(player) for move in state.moves(): next_state = state.next(move) future_opponent_points = next_state.get_points(opponent) future_points = next_state.get_points(player) points_gained = future_points - future_opponent_points if(points_gained <= 0): return True return False
def features(state): # type: (State) -> tuple[float, ...] """ Extract features from this state. Remember that every feature vector returned should have the same length. :param state: A state to be converted to a feature vector :return: A tuple of floats: a feature vector representing this state. """ feature_set = [] # Add player 1's points to feature set p1_points = state.get_points(state.whose_turn()) # Add player 2's points to feature set p2_points = state.get_points(util.other(state.whose_turn())) # Add player 1's pending points to feature set p1_pending_points = state.get_pending_points(state.whose_turn()) # Add plauer 2's pending points to feature set p2_pending_points = state.get_pending_points(util.other( state.whose_turn())) # Get trump suit trump_suit = state.get_trump_suit() # Add phase to feature set phase = state.get_phase() # Add stock size to feature set stock_size = state.get_stock_size() # Add leader to feature set leader = 1 if state.leader() == state.whose_turn() else 2 # Add whose turn it is to feature set # whose_turn = state.whose_turn() # Add opponent's played card to feature set opponents_played_card = state.get_opponents_played_card() ################## You do not need to do anything below this line ######################## ''' correct the perspective, whether is 1 or 2 ''' player_hand = 'P1H' player_win = 'P1W' other_hand = 'P2H' other_win = 'P2W' if state.whose_turn() == 2: player_hand = 'P2H' player_win = 'P2W' other_hand = 'P1H' other_win = 'P1W' perspective = state.get_perspective() # Perform one-hot encoding on the perspective. # Learn more about one-hot here: https://machinelearningmastery.com/how-to-one-hot-encode-sequence-data-in-python/ perspective = [ card if card != 'U' else [1, 0, 0, 0, 0, 0] for card in perspective ] perspective = [ card if card != 'S' else [0, 1, 0, 0, 0, 0] for card in perspective ] perspective = [ card if card != player_hand else [0, 0, 1, 0, 0, 0] for card in perspective ] perspective = [ card if card != other_hand else [0, 0, 0, 1, 0, 0] for card in perspective ] perspective = [ card if card != player_win else [0, 0, 0, 0, 1, 0] for card in perspective ] perspective = [ card if card != other_win else [0, 0, 0, 0, 0, 1] for card in perspective ] # Append one-hot encoded perspective to feature_set feature_set += list(chain(*perspective)) # Append normalized points to feature_set total_points = p1_points + p2_points feature_set.append(p1_points / total_points if total_points > 0 else 0.) feature_set.append(p2_points / total_points if total_points > 0 else 0.) # Append normalized pending points to feature_set total_pending_points = p1_pending_points + p2_pending_points feature_set.append( p1_pending_points / total_pending_points if total_pending_points > 0 else 0.) feature_set.append( p2_pending_points / total_pending_points if total_pending_points > 0 else 0.) # Convert trump suit to id and add to feature set # You don't need to add anything to this part suits = ["C", "D", "H", "S"] trump_suit_onehot = [0, 0, 0, 0] trump_suit_onehot[suits.index(trump_suit)] = 1 feature_set += trump_suit_onehot # Append one-hot encoded phase to feature set feature_set += [1, 0] if phase == 1 else [0, 1] # Append normalized stock size to feature set feature_set.append(stock_size / 10) # Append one-hot encoded leader to feature set feature_set += [1, 0] if leader == 1 else [0, 1] # Append one-hot encoded whose_turn to feature set # feature_set += [1, 0] if whose_turn == 1 else [0, 1] # Append one-hot encoded opponent's card to feature set opponents_played_card_onehot = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] opponents_played_card_onehot[ opponents_played_card if opponents_played_card is not None else 20] = 1 feature_set += opponents_played_card_onehot # Return feature set return feature_set
def next(self, move # type: tuple(int, int) ): """ Computes the next state based on the given move :param move: Tuple of length 2 of which each element can either be an int or None :return: Newly computed state based on current state and given move """ if self.__signature is not None and self.__signature != self.whose_turn(): raise RuntimeError('\n\nGame is in phase 1. Cannot view next state with imperfect information. Try making an assumption first.\n') if self.finished(): raise RuntimeError('Gamestate is finished. No next states exist.') # Start with a copy of the current state state = self.clone() # type: State # If we find an invalid move, we set the __revoked class variable # To the pid of the player who made the incorrect move, and return the state as is. if not state.__is_valid(move): state.__revoked = state.whose_turn() return state # If move is a trump exchange if move[0] is None: # Store the indices we need in variables trump_jack_index = move[1] trump_card_index = state.__deck.get_trump_card_index() # Perform trump jack exchange, perspective updated in function state.__exchange_trump(trump_jack_index) return state # Change turns state.__leads_turn = not state.__leads_turn #Add the given move to the trick, store the whole trick in a variable trick = state.__deck.set_trick(state.whose_turn(), move[0]) # At this point, we know that the move is not a trump jack exchange. # Check if this move is a marriage if move[1] is not None: # A marriage cannot be melded by the non-leading player if state.__leads_turn: raise RuntimeError("Marriage was attempted to be melded by non-leading player") # Update perspective since an additional card is revealed by the player who performs a marriage. state.__deck.add_to_perspective(util.other(state.whose_turn()), move[1], "P" + str(state.whose_turn()) + "H") # Trump suit marriage yields 40 points, regular yields 20, to be awarded at next trick win. if Deck.get_suit(move[1]) == state.__deck.get_trump_suit(): state.__reserve_pending_points(state.whose_turn(), 40) else: state.__reserve_pending_points(state.whose_turn(), 20) # If it is not the lead's turn, i.e. currently the trick is # incomplete and we already know it's not a trump jack exchange if not state.__leads_turn: other = state.whose_turn() state.__player1s_turn = not state.__player1s_turn state.__deck.add_to_perspective(state.whose_turn(), trick[other-1], "P" + str(other) + "H") return state # At this point we know that it is the lead's turn and that a complete # trick from the previous hand can be evaluated. # Evaluate the trick and store the winner in the leader variable leader = state.__evaluate_trick(trick) state.__allocate_trick_points(leader, trick) state.__deck.put_trick_away(leader) if state.__phase == 2 and len(state.hand()) == 0 and not state.finished(): # If all cards are exhausted, the winner of the last trick wins the game state.__set_points(leader, 66) #Draw cards from stock if state.__phase == 1: state.__deck.draw_card(leader) state.__deck.draw_card(util.other(leader)) if state.__deck.get_stock_size() == 0: state.__phase = 2 # Set player1s_turn according to the leader variable state.__player1s_turn = True if leader == 1 else False # Returns state return state
def get_opponents_played_card(self): """ :return: An integer representing the index of the card the opponent has played, None if no card played """ return self.__deck.get_trick()[util.other(self.whose_turn()) - 1]
def heuristic_3b(self, player: int, depth: int, curr_state: State) -> float: curr_state.get_points(player) / (curr_state.get_points( util.other(player)) + curr_state.get_points(player))
def heuristic_1b(self, player: int, depth: int, curr_state: State) -> float: return curr_state.get_pending_points(util.other(player)) / ( curr_state.get_pending_points(util.other(player)) + curr_state.get_pending_points(player))
dqn_number = random.choice([1, 2]) n_epi += 1 while not state.finished(): given_state = state.clone( signature=state.whose_turn()) if state.get_phase() == 1 else state action = (None, None) n_steps += 1 if state.whose_turn() == dqn_number: #print('ai') action = e_greedy(given_state) state = state.next(action) while (state.whose_turn() == util.other(dqn_number)): #print('o') state2 = state.clone(signature=state.whose_turn() ) if state.get_phase() == 1 else state if state.finished(): break move = opponent.get_move(state2) state = state.next(move) reward = 0 if state.finished(): winner, score = state.winner() reward = 1 if winner == dqn_number else 0 win_history.append(reward)
def evaluate(self, node): p1_points = node.state.get_points(self.get_me()) p2_points = node.state.get_points(util.other(self.get_me())) return 1 if p1_points - p2_points > 0 else 0
def value(self): wins = self.outcome[self.parent.state.whose_turn()] loses = self.outcome[util.other(self.parent.state.whose_turn())] return wins - loses
def backward_cost(): return (state.get_points(util.other(player)) ) / self.__WIN_SCORE # the opponent's score + depth