示例#1
0
    def update_strategy_search(self,
                               traverser,
                               state,
                               node_map,
                               action_map,
                               continuation,
                               leaf=False):
        if state.terminal:
            return

        turn = state.turn
        info_set = state.info_set()

        if info_set not in action_map[turn]:
            action_map[turn][info_set] = {'actions': state.valid_actions()}

        valid_actions = action_map[turn][info_set]['actions']

        if leaf is True:
            if info_set not in continuation[turn]:
                continuation[turn][info_set] = Node([i for i in range(4)])

            node = continuation[turn][info_set]
        else:
            if info_set not in node_map[turn]:
                node_map[turn][info_set] = Node(valid_actions)

            node = node_map[turn][info_set]

        strategy = node.strategy()

        if turn == traverser:
            actions = list(strategy.keys())
            probs = list(strategy.values())
            random_action = actions[np.random.choice(len(actions), p=probs)]
            node.strategy_sum[random_action] += 1
            new_state = state.take(random_action, deep=True)

            if leaf is False:
                self.update_strategy_search(
                    traverser,
                    new_state,
                    node_map,
                    action_map,
                    continuation,
                    leaf=new_state.round != state.round)

        else:
            if leaf is False:
                for action in valid_actions:
                    new_state = state.take(action, deep=True)
                    self.update_strategy_search(
                        traverser,
                        new_state,
                        node_map,
                        action_map,
                        continuation,
                        leaf=new_state.round != state.round)
示例#2
0
def test_init():
    actions = ['F', 'C', 'R']
    node = Node(actions)

    strat = node.strategy()

    assert sum(strat.values()) == 1, node
    assert sum(node.strategy_sum.values()) == 1, node
    assert sum(node.regret_sum.values()) == 0, node
示例#3
0
def test_weighting():
    actions = ['F', 'C', 'R']
    node = Node(actions)

    strat = node.strategy(.5)

    assert sum(strat.values()) == 1, node
    assert sum(node.strategy_sum.values()) == .5, node.strategy_sum
    assert sum(node.regret_sum.values()) == 0, node
示例#4
0
def test_regrets():
    actions = ['F', 'C', 'R']
    node = Node(actions)

    node.regret_sum = {'F': .5, 'C': .5, 'R': 0}

    strat = node.strategy()

    assert strat == {'F': .5, 'C': .5, 'R': 0}, strat
    assert sum(node.strategy_sum.values()) == 1, node.strategy_sum
示例#5
0
def update_strategy(traverser, state, node_map, action_map):
    if state.terminal:
        return

    turn = state.turn
    info_set = state.info_set()

    if info_set not in action_map[turn]:
        action_map[turn][info_set] = {'actions': state.valid_actions()}

    valid_actions = action_map[turn][info_set]['actions']

    if info_set not in node_map[turn]:
        node_map[turn][info_set] = Node(valid_actions)

    node = node_map[turn][info_set]
    strategy = node.strategy()

    if turn == traverser:
        actions = list(strategy.keys())
        probs = list(strategy.values())
        random_action = actions[np.random.choice(len(actions), p=probs)]
        node.strategy_sum[random_action] += 1
        new_state = state.take(random_action, deep=True)

        update_strategy(traverser, new_state, node_map, action_map)

    else:
        for action in valid_actions:
            new_state = state.take(action, deep=True)
            update_strategy(traverser, new_state, node_map, action_map)
示例#6
0
def test_average():
    actions = ['F', 'C', 'R']
    node = Node(actions)

    avg = node.avg_strategy()
    assert sum(avg.values()) == 1, avg

    strat = node.strategy(.5)

    assert sum(strat.values()) == 1, node
    assert sum(node.strategy_sum.values()) == .5, node.strategy_sum
    assert sum(node.regret_sum.values()) == 0, node

    avg = node.avg_strategy()

    assert sum(avg.values()) == 1, avg
示例#7
0
def test_update_strategy():
    num_players = 2
    node_map = {i: {} for i in range(num_players)}
    action_map = {i: {} for i in range(num_players)}
    n1 = Node(['F', 'C', '1R'])
    n1.regret_sum = {'F': 0, 'C': 1, '1R': 0}

    n2 = Node(['F', 'C', '1R'])
    n2.regret_sum = {'F': 1, 'C': 0, '1R': 1}

    node_map[0]['As || [[]]'] = n1
    node_map[0]["As || [['C', '1R']]"] = n2
    cards = [Card(14, 1), Card(13, 1), Card(12, 1)]
    state = State(cards, num_players, 1, kuhn_eval)

    update_strategy(0, state, node_map, action_map)

    assert sum(n1.strategy_sum.values()) > 0, f'Util\n{n1}, \nNodes\n{node_map}, Actions\n{json.dumps(action_map, indent=4)}'
    assert sum(n2.strategy_sum.values()) > 0, f'Util\n{n1}, \nNodes\n{node_map}, Actions\n{json.dumps(action_map, indent=4)}'
示例#8
0
def accumulate_regrets(traverser, state, node_map, action_map, prune=False):
    if state.terminal:
        util = state.utility()
        return util

    turn = state.turn
    info_set = state.info_set()

    if info_set not in action_map[turn]:
        action_map[turn][info_set] = {'actions': state.valid_actions()}

    valid_actions = action_map[turn][info_set]['actions']

    if info_set not in node_map[turn]:
        node_map[turn][info_set] = Node(valid_actions)

    node = node_map[turn][info_set]
    strategy = node.strategy()

    if turn == traverser:
        util = {a: 0 for a in valid_actions}
        node_util = np.zeros(len(node_map))
        explored = set(valid_actions)

        for action in valid_actions:
            if prune is True and node.regret_sum[action] <= REGRET_MIN:
                explored.remove(action)
            else:
                new_state = state.take(action, deep=True)
                returned = accumulate_regrets(traverser,
                                              new_state,
                                              node_map,
                                              action_map,
                                              prune=prune)

                util[action] = returned[turn]
                node_util += returned * strategy[action]

        for action in explored:
            regret = util[action] - node_util[turn]
            node.regret_sum[action] += regret

        return node_util

    else:
        actions = list(strategy.keys())
        probs = list(strategy.values())
        random_action = actions[np.random.choice(len(actions), p=probs)]
        new_state = state.take(random_action, deep=True)
        return accumulate_regrets(traverser,
                                  new_state,
                                  node_map,
                                  action_map,
                                  prune=prune)
示例#9
0
文件: vanilla.py 项目: Tom2D/pluribus
def accumulate_regrets(state, node_map, action_map, probs):
    if state.terminal:
        util = state.utility()
        return util

    info_set = state.info_set()

    if info_set not in action_map[state.turn]:
        action_map[state.turn][info_set] = state.valid_actions()

    valid_actions = action_map[state.turn][info_set]

    if info_set not in node_map[state.turn]:
        node_map[state.turn][info_set] = Node(valid_actions)

    node = node_map[state.turn][info_set]

    strategy = node.strategy(probs[state.turn])

    util = {a: 0 for a in valid_actions}
    node_util = np.zeros(len(node_map))
    for action in valid_actions:
        new_prob = [
            p if i != state.turn else p * strategy[action]
            for i, p in enumerate(probs)
        ]
        new_state = state.take(action, deep=True)
        returned = accumulate_regrets(new_state, node_map, action_map,
                                      new_prob)

        util[action] = returned[state.turn]
        node_util += returned * strategy[action]

    reach_prob = 1
    for p, prob in enumerate(probs):
        if p != state.turn:
            reach_prob *= prob

    for action in valid_actions:
        regret = util[action] - node_util[state.turn]
        node.regret_sum[action] += regret * reach_prob

    return node_util
示例#10
0
    def pluribus_turn(self, state, blueprint, action_map, cards):
        info_set = state.info_set()
        turn = state.turn
        if info_set not in action_map[turn]:
            action_map[turn][info_set] = {'actions': state.valid_actions()}

        valid_actions = action_map[turn][info_set]['actions']
        if state.info_set() not in blueprint[state.turn]:
            blueprint[state.turn][state.info_set()] = Node(valid_actions)

        node = blueprint[state.turn][state.info_set()]
        strategy = node.avg_strategy()

        actions = list(strategy.keys())
        probs = list(strategy.values())

        sampled = actions[np.random.choice(len(actions), p=probs)]
        print(f"Pluribus played {sampled}")

        action_map[state.turn][state.info_set()]['frozen'] = sampled

        state.take(sampled)

        self.check_round(state, self.root, blueprint, action_map, cards)
示例#11
0
    def accumulate_regrets_search(self,
                                  traverser,
                                  state,
                                  node_map,
                                  action_map,
                                  continuations,
                                  prune=False,
                                  leaf=False):
        if state.terminal:
            util = state.utility()
            return util

        turn = state.turn
        info_set = state.info_set()

        if info_set not in action_map[turn]:
            action_map[turn][info_set] = {'actions': state.valid_actions()}

        valid_actions = action_map[turn][info_set]['actions']
        if 'fixed' in valid_actions:
            valid_actions = [action_map[turn][info_set]['fixed']]

        if leaf is True:
            if info_set not in continuations[turn]:
                continuations[turn][info_set] = Node(["NULL", "F", "C", "4R"])

            node = continuations[turn][info_set]
            valid_actions = ["NULL", "F", "C", "4R"]
        else:
            if info_set not in node_map[turn]:
                node_map[turn][info_set] = Node(valid_actions)

            node = node_map[turn][info_set]

        strategy = node.strategy()

        if turn == traverser:
            util = {a: 0 for a in valid_actions}
            node_util = np.zeros(len(node_map))
            explored = set(valid_actions)

            for action in valid_actions:
                if prune is True and leaf is False and node.regret_sum[
                        action] <= REGRET_MIN:
                    explored.remove(action)
                else:
                    if leaf is True:
                        returned = self.rollout(traverser, state, action)
                    else:
                        new_state = state.take(action, deep=True)
                        returned = self.accumulate_regrets_search(
                            traverser,
                            new_state,
                            node_map,
                            action_map,
                            continuations,
                            prune=prune,
                            leaf=new_state.round != state.round)
                    util[action] = returned[turn]
                    node_util += returned * strategy[action]

            for action in explored:
                regret = util[action] - node_util[turn]
                node.regret_sum[action] += regret

            return node_util

        else:
            if leaf is True:
                return self.rollout(traverser, state, "NULL")

            actions = list(strategy.keys())
            probs = list(strategy.values())
            random_action = actions[np.random.choice(len(actions), p=probs)]
            new_state = state.take(random_action, deep=True)
            return self.accumulate_regrets_search(
                traverser,
                new_state,
                node_map,
                action_map,
                continuations,
                prune=prune,
                leaf=new_state.round != state.round)