def run_agent(env, tests_moment, gamma=1, theta=1e-8): global _ENVIROMENT_CLASS _ENVIROMENT_CLASS = enviroment_choose.env_choose(env) tmp = policy_iteration(env, gamma=gamma, theta=theta) agent_info = { "policy": tmp[0], "state_action_table": tmp[1] } ''' TESTING ''' #Ottengo dall'ambiente i tipi di test che mi puo' restituire type_test_list = _ENVIROMENT_CLASS.type_test() tests_result = [] tmp_tests_result = {} n_test = 100 n_episodes_test = 100 for type_test in type_test_list: tmp_tests_result.update({type_test: []}) for _ in tqdm(range(n_test)): test_iteration_i = {} for type_test in type_test_list: test_iteration_i.update({type_test: 0}) #Per ogni test eseguiamo 100 "episodi" for _ in range(n_episodes_test): done = False state = _ENVIROMENT_CLASS.reset_env(env) while not done: action = np.argmax(agent_info["policy"][state]) # Use the best learned action test_dict = _ENVIROMENT_CLASS.test_policy(env, action) state = test_dict["env_info"]["next_state"] done = test_dict["env_info"]["done"] for type_test in type_test_list: test_iteration_i[type_test] += test_dict[type_test] for type_test in type_test_list: test_iteration_i[type_test] = test_iteration_i[type_test] / n_episodes_test tests_result.append(test_iteration_i) for type_test in tmp_tests_result: for test in tests_result: tmp_tests_result[type_test].append(test[type_test]) return {"agent_info": agent_info, "tests_result": tmp_tests_result}
def run_agent(env, tests_moment, n_games, n_episodes, alpha=0.1, gamma=0.6, epsilon=0.1, n_step=10, lambd=0.92): global _ENVIROMENT_CLASS global _ENV global _N_GAMES global _N_EPISODES global _ALPHA global _GAMMA global _EPSILON global _LAMBDA global _N_STEP global _ESTIMATOR global _TESTS_MOMENT _ENVIROMENT_CLASS = enviroment_choose.env_choose(env) _ENV = env _N_GAMES = n_games _N_EPISODES = n_episodes _ALPHA = alpha _GAMMA = gamma _EPSILON = epsilon _LAMBDA = lambd _N_STEP = n_step _ESTIMATOR = QEstimator(env=_ENV, step_size=_ALPHA, \ num_tilings=_ENVIROMENT_CLASS.num_tilings(), \ max_size=_ENVIROMENT_CLASS.IHT_max_size(), trace=True) _TESTS_MOMENT = tests_moment results = sarsa_lambda() tests_result_dict = {} for type_test in _TYPE_TEST_LIST: tests_result_dict.update({type_test: []}) for type_test in tests_result_dict: for test in results["tests_result"]: tests_result_dict[type_test].append(test[type_test]) return { "agent_info": results["agent_info"], "tests_result": tests_result_dict }
def run_agent(env, tests_moment, n_games, n_episodes, alpha=0.1, gamma=0.6, epsilon=0.1, n_step=10): global _ENVIROMENT_CLASS global _ENV global _N_GAMES global _N_EPISODES global _ALPHA global _GAMMA global _EPSILON global _N_STEP global _TESTS_MOMENT _ENVIROMENT_CLASS = enviroment_choose.env_choose(env) _ENV = env _N_GAMES = n_games _N_EPISODES = n_episodes _ALPHA = alpha _GAMMA = gamma _EPSILON = epsilon _N_STEP = n_step _TESTS_MOMENT = tests_moment results = n_step_sarsa() tests_result_dict = {} for type_test in _TYPE_TEST_LIST: tests_result_dict.update({type_test: []}) for type_test in tests_result_dict: for test in results["tests_result"]: tests_result_dict[type_test].append(test[type_test]) return { "agent_info": results["agent_info"], "tests_result": tests_result_dict }
for i in range(len(tests_i_agent)): for j in range(len(tests_i_agent[i][test_type])): tmp[j][i] = tests_i_agent[i][test_type][j] percentile_90 = [] for i in range(len(tmp)): percentile_90.append(np.percentile(tmp[i], 90)) return percentile_90 if __name__ == '__main__': env_name = input("Insert the enviroment name: ") env = enviroment_choose.env_choose(env_name) tests_moment = input("Select the test type (final, on_run, ten_perc): ") how_group_same_agent = input("Select how group the results of same agent \n" + \ "(Average, 10th percentile, Quartile 1, Median, Quartile 3, 90th percentile): ") number_of_agent_for_type = int( input("Insert the number of best agent for every type of agent: ")) base_path = "docs/" + env_name + "/" + tests_moment + "/" all_agent_tests = {} all_agent_legend = {} agent_type_list = [x[1] for x in os.walk(base_path)] for agent_type in agent_type_list[0]: path = base_path + agent_type