Python EpisodeSampler примеры использования

Язык программирования: Python

Пространство имен/Пакет: pypokerai.callback

Класс/Тип: EpisodeSampler

Примеров на hotexamples.com: 3

Python EpisodeSampler - 3 примера найдено. Это лучшие примеры Python кода для pypokerai.callback.EpisodeSampler, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

EpisodeSampler(3)

_visualize_action_log(1)

Пример #1

Показать файл

def value_func_generator():
    f = ApproxActionValueFunction(value_func.delegate.handicappers)
    f.setup()
    return f


reset_opponent_value_func = ResetOpponentValueFunction(save_dir_path,
                                                       reset_interval,
                                                       value_func_generator)
callbacks.append(reset_opponent_value_func)

score_output_path = os.path.join(OUTPUT_DIR, "initial_value_transition.csv")
initial_value_scorer = InitialStateValueRecorder(score_output_path)
callbacks.append(initial_value_scorer)

episode_log_path = os.path.join(OUTPUT_DIR, "episode_log.txt")
episode_sample_interval = 1000
episode_sampler = EpisodeSampler(episode_sample_interval, episode_log_path,
                                 my_uuid)
callbacks.append(episode_sampler)

weights_output_path = os.path.join(OUTPUT_DIR, "weights_analysis.txt")
weights_sample_interval = 1000
weights_analyzer = WeightsAnalyzer(weights_sample_interval,
                                   weights_output_path)
callbacks.append(weights_analyzer)

import pdb
pdb.set_trace()
run_insecure_method(algorithm.run_gpi, (TEST_LENGTH, callbacks))

Пример #2

Показать файл

    f = ApproxActionValueFunction(value_func.delegate.handicappers)
    f.setup()
    return f


reset_opponent_value_func = ResetOpponentValueFunction(save_dir_path,
                                                       reset_interval,
                                                       value_func_generator,
                                                       reset_policy="random")
callbacks.append(reset_opponent_value_func)

score_output_path = os.path.join(OUTPUT_DIR, "initial_value_transition.csv")
initial_value_scorer = InitialStateValueRecorder(score_output_path)
callbacks.append(initial_value_scorer)

episode_log_path = os.path.join(OUTPUT_DIR, "episode_log.txt")
episode_sample_interval = 50000
episode_sampler = EpisodeSampler(episode_sample_interval,
                                 episode_log_path,
                                 my_uuid,
                                 show_weights=True)
callbacks.append(episode_sampler)

weights_output_path = os.path.join(OUTPUT_DIR, "weights_analysis.txt")
weights_sample_interval = 50000
weights_analyzer = WeightsAnalyzer(weights_sample_interval,
                                   weights_output_path)
callbacks.append(weights_analyzer)

algorithm.run_gpi(TEST_LENGTH, callbacks)

Пример #3

Показать файл

for path in opponent_func_load_paths:
    value_func = VALUE_FUNC_CLASS(NB_UNIT, blind_structure, handicappers)
    value_func.setup()
    value_func.load(path)
    opponent_value_funcs.append(value_func)

task = TexasHoldemTask(final_round=POKER_ROUND,
                       scale_reward=True,
                       lose_penalty=True)
task.set_opponent_value_functions(opponent_value_funcs)
greedy_policy = GreedyPolicy()

# generate episode
while True:
    quiet_helper = EpisodeSampler("dummy",
                                  "dummy",
                                  "dummy",
                                  show_weights=False)
    loud_helper = EpisodeSampler("dummy", "dummy", "dummy", show_weights=True)
    episode = generate_episode(task, greedy_policy, agent_value_func)
    print "final reward = %s, episode_length=%d." % (episode[-1][3],
                                                     len(episode))

    if "y" == raw_input(">> Do you see this episode in detail? (y/n)"):
        for experience in episode:
            print quiet_helper._visualize_action_log(task, agent_value_func,
                                                     experience)
            if "y" == raw_input("do you want to see weights? (y/n)"):
                print loud_helper._visualize_action_log(
                    task, agent_value_func, experience)
            raw_input(">>> type something to go next...")