def cfr(game, num_iters=10000, info_iters=100): # regrets is a dictionary where the keys are the information sets and values # are dictionaries from actions available in that information set to the # counterfactual regret for not playing that action in that information set. # Since information sets encode the player, we only require one dictionary. regrets = dict() # Similarly, action_counts is a dictionary with keys the information sets # and values dictionaries from actions to action counts. action_counts = dict() # Strategy_t holds the strategy at time t; similarly strategy_t_1 holds the # strategy at time t + 1. strategy_t = dict() strategy_t_1 = dict() average_strategy = None average_strategy_snapshot = None # Each information set is uniquely identified with an action tuple. for t in range(num_iters): for i in [1, 2]: cfr_recursive(game, game.game.root, i, t, 1.0, 1.0, regrets, action_counts, strategy_t, strategy_t_1) if (t % info_iters == 0) and (average_strategy is not None): print("t: {}".format(t)) if average_strategy_snapshot is not None: snapshot_distance = compare_strategies(average_strategy, average_strategy_snapshot) print("Distance between strategies (t - 100): {:.10f}".format(snapshot_distance)) # If the snapshot distance is small enough, then return the # average strategy. This means that Euclidean distance between # the strategy at time t and at time t - 100 is small, which is # hopefully sufficient for convergence. if snapshot_distance < 1e-5: complete_strategy = game.game.complete_strategy_uniformly(average_strategy) exploitability = best_response.compute_exploitability(game.game, complete_strategy) print("Avg strategy exploitability: {:.4f}".format(exploitability)) return average_strategy average_strategy_snapshot = average_strategy.copy() average_strategy = compute_average_strategy(action_counts) # Update strategy_t to equal strategy_t_1. We update strategy_t_1 inside # cfr_recursive. We take a copy because we update it inside # cfr_recursive, and want to hold on to strategy_t_1 separately to # compare. strategy_t = strategy_t_1.copy() if t % 1000 == 0: # We also compute the best response to the current strategy. complete_strategy = game.game.complete_strategy_uniformly(strategy_t) exploitability = best_response.compute_exploitability(game.game, complete_strategy) print("Current strategy exploitability: {:.4f}".format(exploitability)) complete_strategy = game.game.complete_strategy_uniformly(average_strategy) exploitability = best_response.compute_exploitability(game.game, complete_strategy) print("Avg strategy exploitability: {:.4f}".format(exploitability)) return average_strategy
# coding: utf-8 import numpy as np from Dynamic_calculation_of_children import LeducGame from cfr import cfr from cfr_game import CFRGame from example_strategy import constant_action, random_strategy, uniformly_random_strategy from best_response import best_response, compute_exploitability if __name__ == "__main__": game = LeducNode.create_game(3) #game.print_tree(only_leaves=True) # The strategy that always folds. strategy_folds = constant_action(game, 1, 0) strategy_folds.update(constant_action(game, 2, 0)) exploitability_folds = compute_exploitability(game, strategy_folds) print("Exploitability of always folding: {}".format(exploitability_folds)) # The strategy that always calls strategy_calls = constant_action(game, 1, 1) strategy_calls.update(constant_action(game, 2, 1)) exploitability_calls = compute_exploitability(game, strategy_calls) print("Exploitability of always calling: {}".format(exploitability_calls)) # The strategy that always raises. strategy_raises = constant_action(game, 1, 2) strategy_raises.update(constant_action(game, 2, 2)) exploitability_raises = compute_exploitability(game, strategy_raises) print("Exploitability of always raising: {}".format(exploitability_raises)) # A randomly chosen strategy