def save_xfp(): xfp_policy = xfp_solver.average_policy_tables() policy_keys = np.concatenate((list(xfp_policy[0].keys()), list(xfp_policy[1].keys())), 0) policy_values = np.concatenate((list(map(lambda d: list(d.values()), list(xfp_policy[0].values()))), list(map(lambda d: list(d.values()), list(xfp_policy[1].values())))), 0) # change possible None's into 0 policy_values = [(d if d else 0 for d in a) for a in policy_values] xfp_policy = dict(zip(policy_keys, policy_values)) policy_handler.save_to_tabular_policy(game, xfp_policy, "policies/XFP/{}/{}".format(save_prefix, it))
def save_deepcfr(): # and print some info i guess? print("---------iteration " + str(it) + "----------") for player, losses in six.iteritems(advantage_losses): print("Advantage for player ", player, losses) print("Advantage Buffer Size for player", player, len(deep_cfr_solver.advantage_buffers[player])) print("Strategy Buffer Size: ", len(deep_cfr_solver.strategy_buffer)) print("policy loss: ", policy_loss) callable_policy = tabular_policy_from_callable(game, deep_cfr_solver.action_probabilities) tabular_policy = tabular_policy_from_callable(game, callable_policy) policy = dict(zip(tabular_policy.state_lookup, tabular_policy.action_probability_array)) # save under map (save_prefix)_(num_travers) return policy_handler.save_to_tabular_policy(game, policy, "policies/DEEPCFR/{}/{}".format( save_prefix + "_" + str(num_travers), it))
def save_cfrplus(): avg_policy = cfr_solver.average_policy() avg_policy = dict(zip(avg_policy.state_lookup, avg_policy.action_probability_array)) policy_handler.save_to_tabular_policy(game, avg_policy, "policies/CFRPlus/{}/{}".format(save_prefix, it))
def save_cfr_br(): policy = cfr_solver.average_policy() policy = dict(zip(policy.state_lookup, policy.action_probability_array)) policy_handler.save_to_tabular_policy(game, policy, "policies/CFRBR/{}/{}".format(save_prefix, it))