示例#1
0
def value_func_generator():
    f = ApproxActionValueFunction(value_func.delegate.handicappers)
    f.setup()
    return f


reset_opponent_value_func = ResetOpponentValueFunction(save_dir_path,
                                                       reset_interval,
                                                       value_func_generator)
callbacks.append(reset_opponent_value_func)

score_output_path = os.path.join(OUTPUT_DIR, "initial_value_transition.csv")
initial_value_scorer = InitialStateValueRecorder(score_output_path)
callbacks.append(initial_value_scorer)

episode_log_path = os.path.join(OUTPUT_DIR, "episode_log.txt")
episode_sample_interval = 1000
episode_sampler = EpisodeSampler(episode_sample_interval, episode_log_path,
                                 my_uuid)
callbacks.append(episode_sampler)

weights_output_path = os.path.join(OUTPUT_DIR, "weights_analysis.txt")
weights_sample_interval = 1000
weights_analyzer = WeightsAnalyzer(weights_sample_interval,
                                   weights_output_path)
callbacks.append(weights_analyzer)

import pdb
pdb.set_trace()
run_insecure_method(algorithm.run_gpi, (TEST_LENGTH, callbacks))
示例#2
0
    f = ApproxActionValueFunction(value_func.delegate.handicappers)
    f.setup()
    return f


reset_opponent_value_func = ResetOpponentValueFunction(save_dir_path,
                                                       reset_interval,
                                                       value_func_generator,
                                                       reset_policy="random")
callbacks.append(reset_opponent_value_func)

score_output_path = os.path.join(OUTPUT_DIR, "initial_value_transition.csv")
initial_value_scorer = InitialStateValueRecorder(score_output_path)
callbacks.append(initial_value_scorer)

episode_log_path = os.path.join(OUTPUT_DIR, "episode_log.txt")
episode_sample_interval = 50000
episode_sampler = EpisodeSampler(episode_sample_interval,
                                 episode_log_path,
                                 my_uuid,
                                 show_weights=True)
callbacks.append(episode_sampler)

weights_output_path = os.path.join(OUTPUT_DIR, "weights_analysis.txt")
weights_sample_interval = 50000
weights_analyzer = WeightsAnalyzer(weights_sample_interval,
                                   weights_output_path)
callbacks.append(weights_analyzer)

algorithm.run_gpi(TEST_LENGTH, callbacks)
示例#3
0
for path in opponent_func_load_paths:
    value_func = VALUE_FUNC_CLASS(NB_UNIT, blind_structure, handicappers)
    value_func.setup()
    value_func.load(path)
    opponent_value_funcs.append(value_func)

task = TexasHoldemTask(final_round=POKER_ROUND,
                       scale_reward=True,
                       lose_penalty=True)
task.set_opponent_value_functions(opponent_value_funcs)
greedy_policy = GreedyPolicy()

# generate episode
while True:
    quiet_helper = EpisodeSampler("dummy",
                                  "dummy",
                                  "dummy",
                                  show_weights=False)
    loud_helper = EpisodeSampler("dummy", "dummy", "dummy", show_weights=True)
    episode = generate_episode(task, greedy_policy, agent_value_func)
    print "final reward = %s, episode_length=%d." % (episode[-1][3],
                                                     len(episode))

    if "y" == raw_input(">> Do you see this episode in detail? (y/n)"):
        for experience in episode:
            print quiet_helper._visualize_action_log(task, agent_value_func,
                                                     experience)
            if "y" == raw_input("do you want to see weights? (y/n)"):
                print loud_helper._visualize_action_log(
                    task, agent_value_func, experience)
            raw_input(">>> type something to go next...")