示例#1
0
class YahtzeeSingleEnv(Env):
    metadata = {'render.modes': ['human']}

    def __init__(self,
                 rule: Rule = Rule.YAHTZEE_FREE_CHOICE_JOKER,
                 game_type: GameType = GameType.RETRY_ON_WRONG_ACTION,
                 seed=None):
        self.pyhtzee = Pyhtzee(seed=seed)
        self.rule = rule
        self.game_type = game_type
        self.action_space = spaces.Discrete(44)
        self.observation_space = spaces.Tuple((
            spaces.Discrete(13),  # round
            spaces.Discrete(4),  # sub-round
            spaces.Box(low=1, high=6, shape=(1,), dtype=np.uint8),  # die 1
            spaces.Box(low=1, high=6, shape=(1,), dtype=np.uint8),  # die 2
            spaces.Box(low=1, high=6, shape=(1,), dtype=np.uint8),  # die 3
            spaces.Box(low=1, high=6, shape=(1,), dtype=np.uint8),  # die 4
            spaces.Box(low=1, high=6, shape=(1,), dtype=np.uint8),  # die 5
            spaces.Box(low=-1, high=5, shape=(1,), dtype=np.int16),  # aces
            spaces.Box(low=-1, high=10, shape=(1,), dtype=np.int16),  # twos
            spaces.Box(low=-1, high=15, shape=(1,), dtype=np.int16),  # threes
            spaces.Box(low=-1, high=20, shape=(1,), dtype=np.int16),  # fours
            spaces.Box(low=-1, high=25, shape=(1,), dtype=np.int16),  # fives
            spaces.Box(low=-1, high=30, shape=(1,), dtype=np.int16),  # sixes
            spaces.Box(low=-1, high=30, shape=(1,), dtype=np.int16),  # three of a kind
            spaces.Box(low=-1, high=30, shape=(1,), dtype=np.int16),  # four of a kind
            spaces.Box(low=-1, high=25, shape=(1,), dtype=np.int16),  # full house
            spaces.Box(low=-1, high=30, shape=(1,), dtype=np.int16),  # small straight
            spaces.Box(low=-1, high=40, shape=(1,), dtype=np.int16),  # large straight
            spaces.Box(low=-1, high=30, shape=(1,), dtype=np.int16),  # chance
            spaces.Box(low=-1, high=50, shape=(1,), dtype=np.int16),  # yahtzee
            spaces.Box(low=-1, high=35, shape=(1,), dtype=np.int16),  # upper bonus
            spaces.Box(low=-1, high=1200, shape=(1,), dtype=np.int16),  # yahtzee bonus
        ))

    def get_observation_space(self):
        pyhtzee = self.pyhtzee
        return (
            pyhtzee.round,
            pyhtzee.sub_round,
            pyhtzee.dice[0],
            pyhtzee.dice[1],
            pyhtzee.dice[2],
            pyhtzee.dice[3],
            pyhtzee.dice[4],
            get_score(pyhtzee.scores.get(Category.ACES)),
            get_score(pyhtzee.scores.get(Category.TWOS)),
            get_score(pyhtzee.scores.get(Category.THREES)),
            get_score(pyhtzee.scores.get(Category.FOURS)),
            get_score(pyhtzee.scores.get(Category.FIVES)),
            get_score(pyhtzee.scores.get(Category.SIXES)),
            get_score(pyhtzee.scores.get(Category.THREE_OF_A_KIND)),
            get_score(pyhtzee.scores.get(Category.FOUR_OF_A_KIND)),
            get_score(pyhtzee.scores.get(Category.FULL_HOUSE)),
            get_score(pyhtzee.scores.get(Category.SMALL_STRAIGHT)),
            get_score(pyhtzee.scores.get(Category.LARGE_STRAIGHT)),
            get_score(pyhtzee.scores.get(Category.CHANCE)),
            get_score(pyhtzee.scores.get(Category.YAHTZEE)),
            get_score(pyhtzee.scores.get(Category.UPPER_SECTION_BONUS)),
            get_score(pyhtzee.scores.get(Category.YAHTZEE_BONUS)),
        )

    def sample_action(self):
        action = self.pyhtzee.sample_action()
        log.info(f'Sampled action: {action}')
        return action

    def step(self, action: int):
        pyhtzee = self.pyhtzee
        try:
            reward = pyhtzee.take_action(action)
            finished = pyhtzee.is_finished()
            valid_move = True
        except PyhtzeeException:
            valid_move = False
            reward = 0
            if self.game_type == GameType.SUDDEN_DEATH:
                log.info('Invalid action, terminating round.')
                finished = True
            else:  # retry on wrong action
                log.info('Invalid action, step ignored.')
                finished = False

        log.info(f'Finished step. Reward: {reward}, Finished: {finished}')
        debug_info = {
            'valid_move': valid_move,
        }
        return self.get_observation_space(), reward, finished, debug_info

    def reset(self):
        self.pyhtzee = Pyhtzee()

    def render(self, mode='human', close=False):
        dice = self.pyhtzee.dice
        outfile = sys.stdout
        outfile.write(f'Dice: {dice[0]} {dice[1]} {dice[2]} {dice[3]} {dice[4]} '
                      f'Round: {self.pyhtzee.round}.{self.pyhtzee.sub_round} '
                      f'Score: {self.pyhtzee.get_total_score()}\n')
示例#2
0
 def test_sample_action(self):
     pyhtzee = Pyhtzee()
     action = pyhtzee.sample_action()
     self.assertIn(action, pyhtzee.get_possible_actions())