def _get_best_meld_cluster(self, player_id: int) -> List[List[Card]]: game_canvas = self.game_canvas hand = game_canvas.getter.get_held_pile_cards(player_id=player_id) best_meld_clusters = get_best_meld_clusters(hand=hand) best_meld_cluster = [] if not best_meld_clusters else best_meld_clusters[ 0] return best_meld_cluster
def get_payoff_gin_rummy_v1(player: GinRummyPlayer, game: 'GinRummyGame') -> int or float: ''' Get the payoff of player: a) 1.0 if player gins b) 0.2 if player knocks c) -deadwood_count / 100 otherwise Returns: payoff (int or float): payoff for player ''' # payoff is 1.0 if player gins # payoff is 0.2 if player knocks # payoff is -deadwood_count / 100 if otherwise # The goal is to have the agent learn how to knock and gin. # The negative payoff when the agent fails to knock or gin should encourage the agent to form melds. # The payoff is scaled to lie between -1 and 1. going_out_action = game.round.going_out_action going_out_player_id = game.round.going_out_player_id if going_out_player_id == player.player_id and type( going_out_action) is KnockAction: payoff = 0.2 elif going_out_player_id == player.player_id and type( going_out_action) is GinAction: payoff = 1 else: hand = player.hand best_meld_clusters = melding.get_best_meld_clusters(hand=hand) best_meld_cluster = [] if not best_meld_clusters else best_meld_clusters[ 0] deadwood_count = utils.get_deadwood_count(hand, best_meld_cluster) payoff = -deadwood_count / 100 return payoff
def score_player_0(self, action: ScoreNorthPlayerAction): # when current_player takes ScoreNorthPlayerAction step, the move is recorded and executed # south becomes current player if not self.current_player_id == 0: raise GinRummyProgramError("current_player_id is {}: should be 0.".format(self.current_player_id)) current_player = self.get_current_player() best_meld_clusters = melding.get_best_meld_clusters(hand=current_player.hand) best_meld_cluster = [] if not best_meld_clusters else best_meld_clusters[0] deadwood_count = utils.get_deadwood_count(hand=current_player.hand, meld_cluster=best_meld_cluster) self.move_sheet.append(ScoreNorthMove(player=current_player, action=action, best_meld_cluster=best_meld_cluster, deadwood_count=deadwood_count)) self.current_player_id = 1
def _get_payoff(self, player: GinRummyPlayer, game) -> float: going_out_action = game.round.going_out_action going_out_player_id = game.round.going_out_player_id if going_out_player_id == player.player_id and type(going_out_action) is KnockAction: payoff = self._knock_reward elif going_out_player_id == player.player_id and type(going_out_action) is GinAction: payoff = self._gin_reward else: hand = player.hand best_meld_clusters = melding.get_best_meld_clusters(hand=hand) best_meld_cluster = [] if not best_meld_clusters else best_meld_clusters[0] deadwood_count = utils.get_deadwood_count(hand, best_meld_cluster) payoff = -deadwood_count / 100 return payoff
def score_player_1(self, action: ScoreSouthPlayerAction): assert self.current_player_id == 1 current_player = self.get_current_player() best_meld_clusters = melding.get_best_meld_clusters( hand=current_player.hand) best_meld_cluster = [] if not best_meld_clusters else best_meld_clusters[ 0] deadwood_count = utils.get_deadwood_count( hand=current_player.hand, meld_cluster=best_meld_cluster) self.move_sheet.append( ScoreSouthMove(player=current_player, action=action, best_meld_cluster=best_meld_cluster, deadwood_count=deadwood_count)) self.is_over = True
def score_player_0(self, action: ScoreNorthPlayerAction): # when current_player takes ScoreNorthPlayerAction step, the move is recorded and executed # south becomes current player assert self.current_player_id == 0 current_player = self.get_current_player() best_meld_clusters = melding.get_best_meld_clusters( hand=current_player.hand) best_meld_cluster = [] if not best_meld_clusters else best_meld_clusters[ 0] deadwood_count = utils.get_deadwood_count( hand=current_player.hand, meld_cluster=best_meld_cluster) self.move_sheet.append( ScoreNorthMove(player=current_player, action=action, best_meld_cluster=best_meld_cluster, deadwood_count=deadwood_count)) self.current_player_id = 1
def get_payoffs(self, game: GinRummyGame): ''' Get the payoffs of players: a) 1 if gin b) 0.2 if knock c) -deadwood_count / 100 otherwise Returns: payoffs (list): a list of payoffs for each player ''' payoffs = [0, 0] game_round = game.round last_action = game.actions[-1] assert game_round.is_over assert type(last_action) is ScoreSouthPlayerAction going_out_action = game_round.going_out_action going_out_player_id = game_round.going_out_player_id for i in range(2): # FIXME: 200213 simplified calculation player = game.round.players[i] hand = player.hand if self.get_payoff: payoff = self.get_payoff(player, game) else: best_meld_clusters = melding.get_best_meld_clusters(hand=hand) best_meld_cluster = [] if not best_meld_clusters else best_meld_clusters[ 0] deadwood_count = utils.get_deadwood_count( hand, best_meld_cluster) payoff = -deadwood_count / 100 if going_out_player_id == player.player_id and type( going_out_action) is KnockAction: payoff = 0.2 # FIXME: 200213 simplified calculation elif going_out_player_id == player.player_id and type( going_out_action) is GinAction: payoff = 1 # FIXME: 200213 simplified calculation elif type(going_out_action) is DeclareDeadHandAction: pass # FIXME: 200213 payoffs should be zeros else: raise Exception("get_payoffs: ???") payoffs[i] = payoff return payoffs