示例#1
0
 def save_xfp():
     xfp_policy = xfp_solver.average_policy_tables()
     policy_keys = np.concatenate((list(xfp_policy[0].keys()), list(xfp_policy[1].keys())), 0)
     policy_values = np.concatenate((list(map(lambda d: list(d.values()), list(xfp_policy[0].values()))),
                                     list(map(lambda d: list(d.values()), list(xfp_policy[1].values())))), 0)
     # change possible None's into 0
     policy_values = [(d if d else 0 for d in a) for a in policy_values]
     xfp_policy = dict(zip(policy_keys, policy_values))
     policy_handler.save_to_tabular_policy(game, xfp_policy, "policies/XFP/{}/{}".format(save_prefix, it))
示例#2
0
 def save_deepcfr():  # and print some info i guess?
     print("---------iteration " + str(it) + "----------")
     for player, losses in six.iteritems(advantage_losses):
         print("Advantage for player ", player, losses)
         print("Advantage Buffer Size for player", player,
               len(deep_cfr_solver.advantage_buffers[player]))
     print("Strategy Buffer Size: ",
           len(deep_cfr_solver.strategy_buffer))
     print("policy loss: ", policy_loss)
     callable_policy = tabular_policy_from_callable(game, deep_cfr_solver.action_probabilities)
     tabular_policy = tabular_policy_from_callable(game, callable_policy)
     policy = dict(zip(tabular_policy.state_lookup, tabular_policy.action_probability_array))
     # save under map (save_prefix)_(num_travers)
     return policy_handler.save_to_tabular_policy(game, policy, "policies/DEEPCFR/{}/{}".format(
         save_prefix + "_" + str(num_travers), it))
示例#3
0
 def save_cfrplus():
     avg_policy = cfr_solver.average_policy()
     avg_policy = dict(zip(avg_policy.state_lookup, avg_policy.action_probability_array))
     policy_handler.save_to_tabular_policy(game, avg_policy, "policies/CFRPlus/{}/{}".format(save_prefix, it))
示例#4
0
 def save_cfr_br():
     policy = cfr_solver.average_policy()
     policy = dict(zip(policy.state_lookup, policy.action_probability_array))
     policy_handler.save_to_tabular_policy(game, policy, "policies/CFRBR/{}/{}".format(save_prefix, it))