def validate_stochastic_experiment(base_folder, const_actions, env_entry_point): experiment_name = "stoch-env_validation" experiment_folder = prepare_experiment(base_folder, experiment_name) if experiment_folder is None: return description_file = "{}/experiment_description.txt".format( experiment_folder) with open(description_file, "w") as f: param_i_correspondence = "\n".join( ["{} - {}".format(i, k_s) for i, k_s in enumerate(const_actions)]) f.write(gen_exp_descr(experiment_name, param_i_correspondence)) for a in const_actions: subfolder = "{}/k={}".format(experiment_folder, a) os.mkdir(subfolder) run_ps_const_control_experiment_with_files( env_entry_point=env_entry_point, base_folder=subfolder, n_episodes=3, max_n_steps=200, time_step=1, p_reff=1.3, const_action=a, log_level=logging.INFO, p_reff_amplitude=0, p_reff_period=200, get_seed=lambda: 23)
def validation_experiment(base_folder, env_entry_point): experiment_name = "setup_validation" experiment_folder = prepare_experiment(base_folder, experiment_name) if experiment_folder is None: return description_file = "{}/experiment_description.txt".format( experiment_folder) with open(description_file, "w") as f: f.write(gen_exp_descr(experiment_name, None)) run_ps_agent_experiment_with_result_files( agent_train_test_once=lambda env: ps_train_test_osp_ql( ps_env=env, max_number_of_steps=MAX_NUMBER_OF_STEPS, n_episodes=N_EPISODES, rand_qtab=RAND_QTAB, learning_rate=LEARNING_RATE, discount_factor=DISCOUNT_FACTOR, exploration_rate=EXPLORATION_RATE, exploration_decay_rate=EXPLORATION_DECAY_RATE, k_s=ACTIONS, visualize=VISUALIZE), base_folder=experiment_folder, n_repeat=N_REPEAT, time_step=TIME_STEP, p_reff=P_REF, log_level=LOG_LEVEL, env_entry_point=env_entry_point, compute_reward=None, )
def hop_window_experiment(base_folder, env_entry_point, ws_s, experiment_name="hop_window", write_large=True, **kwargs): experiment_folder = prepare_experiment(base_folder, experiment_name) if experiment_folder is None: return description_file = "{}/experiment_description.txt".format( experiment_folder) with open(description_file, "w") as f: param_i_correspondence = "\n".join([ "{} - window size = {}".format(i, ws) for i, ws in enumerate(ws_s) ]) f.write(gen_exp_descr(experiment_name, param_i_correspondence)) for ws in ws_s: subfolder = "{}/window_size={}".format(experiment_folder, ws) os.mkdir(subfolder) run_ps_agent_experiment_with_result_files( agent_train_test_once=lambda env: ps_train_test_window_osp_ql( ps_env=env, max_number_of_steps=MAX_NUMBER_OF_STEPS, n_episodes=N_EPISODES, rand_qtab=RAND_QTAB, learning_rate=LEARNING_RATE, discount_factor=DISCOUNT_FACTOR, exploration_rate=EXPLORATION_RATE, exploration_decay_rate=EXPLORATION_DECAY_RATE, k_s=ACTIONS, visualize=VISUALIZE, n_bins=N_BINS, window_size=ws, n_test_episodes=N_TEST_EPISODES, n_test_steps=N_TEST_STEPS, hop_size=ws), base_folder=subfolder, n_repeat=N_REPEAT, time_step=TIME_STEP, p_reff=P_REF, log_level=LOG_LEVEL, env_entry_point=env_entry_point, compute_reward=None, write_large=write_large, **kwargs)
def exploration_experiment( base_folder, explor_params, env_entry_point, experiment_name="exploration(rate_and_discount)_variation"): experiment_folder = prepare_experiment(base_folder, experiment_name) if experiment_folder is None: return description_file = "{}/experiment_description.txt".format( experiment_folder) with open(description_file, "w") as f: param_i_correspondence = "\n".join([ "{} - exploration rate = {}, exploration rate decay = {}".format( i, params[0], params[1]) for i, params in enumerate(explor_params) ]) f.write(gen_exp_descr(experiment_name, param_i_correspondence)) for expl_rate, expl_decay in explor_params: subfolder = "{}/expl_rate={}_decay={}".format(experiment_folder, expl_rate, expl_decay) os.mkdir(subfolder) run_ps_agent_experiment_with_result_files( agent_train_test_once=lambda env: ps_train_test_osp_ql( ps_env=env, max_number_of_steps=MAX_NUMBER_OF_STEPS, n_episodes=N_EPISODES, rand_qtab=RAND_QTAB, learning_rate=LEARNING_RATE, discount_factor=DISCOUNT_FACTOR, exploration_rate=expl_rate, exploration_decay_rate=expl_decay, k_s=ACTIONS, visualize=VISUALIZE, n_test_episodes=N_TEST_EPISODES), base_folder=subfolder, n_repeat=N_REPEAT, time_step=TIME_STEP, p_reff=P_REF, log_level=LOG_LEVEL, env_entry_point=env_entry_point, compute_reward=None, )
def reward_experiment(base_folder, env_entry_point, compute_reward_s, experiment_name="reward_variation"): experiment_folder = prepare_experiment(base_folder, experiment_name) if experiment_folder is None: return description_file = "{}/experiment_description.txt".format( experiment_folder) param_i_correspondence = "\n".join([ "{} - reward = {}".format(i, comp_rew[0]) for i, comp_rew in enumerate(compute_reward_s) ]) with open(description_file, "w") as f: f.write(gen_exp_descr(experiment_name, param_i_correspondence)) for comp_rew in compute_reward_s: name, func = comp_rew subfolder = "{}/reward-{}".format(experiment_folder, name) os.mkdir(subfolder) run_ps_agent_experiment_with_result_files( agent_train_test_once=lambda env: ps_train_test_osp_ql( ps_env=env, max_number_of_steps=MAX_NUMBER_OF_STEPS, n_episodes=N_EPISODES, rand_qtab=RAND_QTAB, learning_rate=LEARNING_RATE, discount_factor=DISCOUNT_FACTOR, exploration_rate=EXPLORATION_RATE, exploration_decay_rate=EXPLORATION_DECAY_RATE, k_s=ACTIONS, visualize=VISUALIZE, n_test_episodes=N_TEST_EPISODES), base_folder=subfolder, n_repeat=N_REPEAT, time_step=TIME_STEP, p_reff=P_REF, log_level=LOG_LEVEL, env_entry_point=env_entry_point, compute_reward=func, )
def best_combination_experiment_tsp(base_folder, env_entry_point, t_s, experiment_name="step_once_consid_p_reff", **kwargs): experiment_folder = prepare_experiment(base_folder, experiment_name) if experiment_folder is None: return description_file = "{}/experiment_description.txt".format( experiment_folder) with open(description_file, "w") as f: param_i_correspondence = "\n".join( ["{} - time_step = {}".format(i, t) for i, t in enumerate(t_s)]) f.write(gen_exp_descr(experiment_name, param_i_correspondence)) for t in t_s: subfolder = "{}/time_step={}".format(experiment_folder, t) os.mkdir(subfolder) max_n_steps = MAX_NUMBER_OF_STEPS // t run_ps_agent_experiment_with_result_files( agent_train_test_once=lambda env: ps_train_test_tsp_ql( ps_env=env, max_number_of_steps=max_n_steps, n_episodes=N_EPISODES, rand_qtab=RAND_QTAB, learning_rate=LEARNING_RATE, discount_factor=DISCOUNT_FACTOR, exploration_rate=EXPLORATION_RATE, exploration_decay_rate=EXPLORATION_DECAY_RATE, k_s=ACTIONS, visualize=VISUALIZE, n_test_episodes=N_TEST_EPISODES), base_folder=subfolder, n_repeat=N_REPEAT, time_step=t, p_reff=P_REF, log_level=LOG_LEVEL, env_entry_point=env_entry_point, compute_reward=None, **kwargs)