Exemplo n.º 1
0
    def test_bucket_small_04(self):
        # Round #4, B (93), A (-93)
        # B posts the blind of 1
        # A posts the blind of 2
        # B dealt Qs 8c [Ts Jc]
        # A dealt Js Kh [4s Ah]
        # B calls
        # A checks
        # Flop 7d 2h 5s [Qd 2h 6s], B (2), A (2)
        # A checks
        # B bets 2
        sb_index = 1
        round_state = create_new_round(sb_index)

        print("Start of preflop, no actions yet")
        infoset = make_infoset(round_state, 1, True)
        bucket = bucket_small(infoset)
        print(bucket_small_join(bucket))

        round_state = round_state.proceed(CallAction())

        print("SB called on preflop")
        infoset = make_infoset(round_state, 0, False)
        bucket = bucket_small(infoset)
        print(bucket_small_join(bucket))

        round_state = round_state.proceed(CheckAction())

        print("BB checked on preflop, first action of flop")
        infoset = make_infoset(round_state, 0, False)
        bucket = bucket_small(infoset)
        print(bucket_small_join(bucket))

        round_state = round_state.proceed(CheckAction())

        print("BB checked on flop")
        infoset = make_infoset(round_state, 1, True)
        bucket = bucket_small(infoset)
        print(bucket_small_join(bucket))

        round_state = round_state.proceed(RaiseAction(2))

        print("SB bet 2")
        infoset = make_infoset(round_state, 0, False)
        bucket = bucket_small(infoset)
        print(bucket_small_join(bucket))
Exemplo n.º 2
0
    def test_exceed_action_limit(self):
        # P2 is the small blind.
        sb_index = 1
        round_state = create_new_round(sb_index)

        # SB calls.
        round_state = round_state.proceed(CallAction())

        # BB checks.
        round_state = round_state.proceed(CheckAction())

        # Do 8 bets/raises to exceed the max 6 actions.
        round_state = round_state.proceed(RaiseAction(2))
        round_state = round_state.proceed(RaiseAction(4))
        round_state = round_state.proceed(RaiseAction(6))
        round_state = round_state.proceed(RaiseAction(8))

        infoset = make_infoset(round_state, 0, False)
        bucket = bucket_small(infoset)
        print(bucket_small_join(bucket))

        round_state = round_state.proceed(RaiseAction(10))

        infoset = make_infoset(round_state, 1, True)
        bucket = bucket_small(infoset)
        print(bucket_small_join(bucket))

        round_state = round_state.proceed(RaiseAction(12))
        round_state = round_state.proceed(RaiseAction(14))

        infoset = make_infoset(round_state, 1, True)
        bucket = bucket_small(infoset)
        print(bucket_small_join(bucket))

        round_state = round_state.proceed(RaiseAction(16))
        round_state = round_state.proceed(CallAction())

        infoset = make_infoset(round_state, 0, False)
        expected = torch.Tensor(
            [1, 2, 1, 0, 0, 0, 2, 4, 14, 12, 0, 0, 0, 0, 0, 0, 0, 0])
        self.assertTrue((infoset.bet_history_vec == expected).all())

        bucket = bucket_small(infoset)
        print(bucket_small_join(bucket))
Exemplo n.º 3
0
    def get_strategy(self, infoset, valid_mask):
        """
    Does regret matching to return a probabilistic strategy.
    """
        bucket = bucket_small(infoset)
        bstring = bucket_small_join(bucket)

        if bstring not in self._regrets:
            self._regrets[bstring] = torch.zeros(Constants.NUM_ACTIONS)
        total_regret = self._regrets[bstring]

        with torch.no_grad():
            r_plus = torch.clamp(total_regret, min=0)

        if r_plus.sum() < 1e-3:
            return torch.ones(Constants.NUM_ACTIONS) / Constants.NUM_ACTIONS
        else:
            return r_plus / r_plus.sum()
Exemplo n.º 4
0
    def add_regret(self, infoset, r):
        """
    Adds an instantaneous regret to total regret.
    """
        assert (len(r) == Constants.NUM_ACTIONS)

        # t0 = time.time()
        bucket = bucket_small(infoset)
        bstring = bucket_small_join(bucket)
        # elapsed = time.time() - t0
        # print("Bucket=", elapsed)

        if bstring not in self._regrets:
            self._regrets[bstring] = torch.zeros(Constants.NUM_ACTIONS)

        # CFR+ regret matching.
        # https://arxiv.org/pdf/1407.5042.pdf
        self._regrets[bstring] = torch.max(torch.zeros(Constants.NUM_ACTIONS),
                                           self._regrets[bstring] + r)
Exemplo n.º 5
0
    def test_bucket_small_03(self):
        # BB.T.H2|x.R.R.x|R.R.x.x|2P.x.x.x': tensor([168., 160., 142., 181., 152., 144.])
        random.seed(123)
        sb_index = 0
        round_state = create_new_round(sb_index)
        round_state = round_state.proceed(RaiseAction(4))  # SB raises.

        infoset = make_infoset(round_state, 1, False)
        bucket = bucket_small_join(bucket_small(infoset))
        print(bucket)

        round_state = round_state.proceed(RaiseAction(8))  # BB raises.

        infoset = make_infoset(round_state, 0, True)
        print("SB raises, BB raises")
        bucket = bucket_small_join(bucket_small(infoset))
        print(bucket)

        round_state = round_state.proceed(RaiseAction(12))  # SB raises.

        infoset = make_infoset(round_state, 1, False)
        print("SB raises, BB raises, SB raises")
        bucket = bucket_small_join(bucket_small(infoset))
        print(bucket)

        round_state = round_state.proceed(RaiseAction(16))  # BB raises.
        round_state = round_state.proceed(RaiseAction(20))  # SB raises.

        print("SB raise, BB raise, SB raise, BB raise, SB raise")
        infoset = make_infoset(round_state, 1, False)
        bucket = bucket_small_join(bucket_small(infoset))
        print(bucket)

        round_state = round_state.proceed(CallAction())  # BB calls.

        infoset = make_infoset(round_state, 1, False)
        print("flop, no actions yet")
        bucket = bucket_small_join(bucket_small(infoset))
        print(bucket)

        round_state = round_state.proceed(CheckAction())  # BB checks.

        print("flop, BB checked")
        infoset = make_infoset(round_state, 0, True)
        bucket = bucket_small_join(bucket_small(infoset))
        print(bucket)

        round_state = round_state.proceed(CheckAction())  # SB checks.

        print("SB checked, now on turn")
        infoset = make_infoset(round_state, 1, False)
        bucket = bucket_small_join(bucket_small(infoset))
        print(bucket)

        round_state = round_state.proceed(RaiseAction(10))  # BB raises.
        round_state = round_state.proceed(CallAction())  # SB calls.

        print("turn, BB raised, SB called, now on river")
        infoset = make_infoset(round_state, 1, False)
        bucket = bucket_small_join(bucket_small(infoset))
        print(bucket)

        round_state = round_state.proceed(CheckAction())  # BB checks.

        print("river, BB checks")
        infoset = make_infoset(round_state, 0, True)
        bucket = bucket_small_join(bucket_small(infoset))
        print(bucket)

        round_state = round_state.proceed(RaiseAction(10))  # SB raises.

        print("river, BB checks, SB raises")
        infoset = make_infoset(round_state, 1, False)
        bucket = bucket_small_join(bucket_small(infoset))
        print(bucket)

        round_state = round_state.proceed(RaiseAction(40))  # BB raises.

        print("river, BB checks, SB raises, BB raises")
        infoset = make_infoset(round_state, 1, False)
        bucket = bucket_small_join(bucket_small(infoset))
        print(bucket)

        round_state = round_state.proceed(RaiseAction(60))  # SB raises.
        round_state = round_state.proceed(RaiseAction(100))  # BB raises.

        print("river, BB check, SB raise, BB raise, SB raise, BB raise")
        infoset = make_infoset(round_state, 0, True)
        bucket = bucket_small_join(bucket_small(infoset))
        print(bucket)

        round_state = round_state.proceed(RaiseAction(120))  # SB raises.
        infoset = make_infoset(round_state, 1, False)
        bucket = bucket_small_join(bucket_small(infoset))
        print(bucket)