示例#1
0
def cfr(game, num_iters=10000, info_iters=100):
    # regrets is a dictionary where the keys are the information sets and values
    # are dictionaries from actions available in that information set to the
    # counterfactual regret for not playing that action in that information set.
    # Since information sets encode the player, we only require one dictionary.
    regrets = dict()

    # Similarly, action_counts is a dictionary with keys the information sets
    # and values dictionaries from actions to action counts.
    action_counts = dict()

    # Strategy_t holds the strategy at time t; similarly strategy_t_1 holds the
    # strategy at time t + 1.
    strategy_t = dict()
    strategy_t_1 = dict()

    average_strategy = None
    average_strategy_snapshot = None

    # Each information set is uniquely identified with an action tuple.
    for t in range(num_iters):
        for i in [1, 2]:
            cfr_recursive(game, game.game.root, i, t, 1.0, 1.0, regrets,
                          action_counts, strategy_t, strategy_t_1)

        if (t % info_iters == 0) and (average_strategy is not None):
            print("t: {}".format(t))
            if average_strategy_snapshot is not None:
                snapshot_distance = compare_strategies(average_strategy, average_strategy_snapshot)
                print("Distance between strategies (t - 100): {:.10f}".format(snapshot_distance))

                # If the snapshot distance is small enough, then return the
                # average strategy.  This means that Euclidean distance between
                # the strategy at time t and at time t - 100 is small, which is
                # hopefully sufficient for convergence.
                if snapshot_distance < 1e-5:
                    complete_strategy = game.game.complete_strategy_uniformly(average_strategy)
                    exploitability = best_response.compute_exploitability(game.game, complete_strategy)
                    print("Avg strategy exploitability: {:.4f}".format(exploitability))
                    return average_strategy

            average_strategy_snapshot = average_strategy.copy()
        average_strategy = compute_average_strategy(action_counts)

        # Update strategy_t to equal strategy_t_1. We update strategy_t_1 inside
        # cfr_recursive.  We take a copy because we update it inside
        # cfr_recursive, and want to hold on to strategy_t_1 separately to
        # compare.
        strategy_t = strategy_t_1.copy()

        if t % 1000 == 0:
            # We also compute the best response to the current strategy.
            complete_strategy = game.game.complete_strategy_uniformly(strategy_t)
            exploitability = best_response.compute_exploitability(game.game, complete_strategy)
            print("Current strategy exploitability: {:.4f}".format(exploitability))

    complete_strategy = game.game.complete_strategy_uniformly(average_strategy)
    exploitability = best_response.compute_exploitability(game.game, complete_strategy)
    print("Avg strategy exploitability: {:.4f}".format(exploitability))
    return average_strategy
示例#2
0
# coding: utf-8
import numpy as np
from Dynamic_calculation_of_children import LeducGame
from cfr import cfr
from cfr_game import CFRGame
from example_strategy import constant_action, random_strategy, uniformly_random_strategy
from best_response import best_response, compute_exploitability

if __name__ == "__main__":
    game = LeducNode.create_game(3)
    #game.print_tree(only_leaves=True)

    # The strategy that always folds.
    strategy_folds = constant_action(game, 1, 0)
    strategy_folds.update(constant_action(game, 2, 0))
    exploitability_folds = compute_exploitability(game, strategy_folds)
    print("Exploitability of always folding: {}".format(exploitability_folds))

    # The strategy that always calls
    strategy_calls = constant_action(game, 1, 1)
    strategy_calls.update(constant_action(game, 2, 1))
    exploitability_calls = compute_exploitability(game, strategy_calls)
    print("Exploitability of always calling: {}".format(exploitability_calls))

    # The strategy that always raises.
    strategy_raises = constant_action(game, 1, 2)
    strategy_raises.update(constant_action(game, 2, 2))
    exploitability_raises = compute_exploitability(game, strategy_raises)
    print("Exploitability of always raising: {}".format(exploitability_raises))

    # A randomly chosen strategy