def value_func_generator(): f = ApproxActionValueFunction(value_func.delegate.handicappers) f.setup() return f reset_opponent_value_func = ResetOpponentValueFunction(save_dir_path, reset_interval, value_func_generator) callbacks.append(reset_opponent_value_func) score_output_path = os.path.join(OUTPUT_DIR, "initial_value_transition.csv") initial_value_scorer = InitialStateValueRecorder(score_output_path) callbacks.append(initial_value_scorer) episode_log_path = os.path.join(OUTPUT_DIR, "episode_log.txt") episode_sample_interval = 1000 episode_sampler = EpisodeSampler(episode_sample_interval, episode_log_path, my_uuid) callbacks.append(episode_sampler) weights_output_path = os.path.join(OUTPUT_DIR, "weights_analysis.txt") weights_sample_interval = 1000 weights_analyzer = WeightsAnalyzer(weights_sample_interval, weights_output_path) callbacks.append(weights_analyzer) import pdb pdb.set_trace() run_insecure_method(algorithm.run_gpi, (TEST_LENGTH, callbacks))
f = ApproxActionValueFunction(value_func.delegate.handicappers) f.setup() return f reset_opponent_value_func = ResetOpponentValueFunction(save_dir_path, reset_interval, value_func_generator, reset_policy="random") callbacks.append(reset_opponent_value_func) score_output_path = os.path.join(OUTPUT_DIR, "initial_value_transition.csv") initial_value_scorer = InitialStateValueRecorder(score_output_path) callbacks.append(initial_value_scorer) episode_log_path = os.path.join(OUTPUT_DIR, "episode_log.txt") episode_sample_interval = 50000 episode_sampler = EpisodeSampler(episode_sample_interval, episode_log_path, my_uuid, show_weights=True) callbacks.append(episode_sampler) weights_output_path = os.path.join(OUTPUT_DIR, "weights_analysis.txt") weights_sample_interval = 50000 weights_analyzer = WeightsAnalyzer(weights_sample_interval, weights_output_path) callbacks.append(weights_analyzer) algorithm.run_gpi(TEST_LENGTH, callbacks)
for path in opponent_func_load_paths: value_func = VALUE_FUNC_CLASS(NB_UNIT, blind_structure, handicappers) value_func.setup() value_func.load(path) opponent_value_funcs.append(value_func) task = TexasHoldemTask(final_round=POKER_ROUND, scale_reward=True, lose_penalty=True) task.set_opponent_value_functions(opponent_value_funcs) greedy_policy = GreedyPolicy() # generate episode while True: quiet_helper = EpisodeSampler("dummy", "dummy", "dummy", show_weights=False) loud_helper = EpisodeSampler("dummy", "dummy", "dummy", show_weights=True) episode = generate_episode(task, greedy_policy, agent_value_func) print "final reward = %s, episode_length=%d." % (episode[-1][3], len(episode)) if "y" == raw_input(">> Do you see this episode in detail? (y/n)"): for experience in episode: print quiet_helper._visualize_action_log(task, agent_value_func, experience) if "y" == raw_input("do you want to see weights? (y/n)"): print loud_helper._visualize_action_log( task, agent_value_func, experience) raw_input(">>> type something to go next...")