def validate_stochastic_experiment(base_folder, const_actions,
                                   env_entry_point):
    experiment_name = "stoch-env_validation"
    experiment_folder = prepare_experiment(base_folder, experiment_name)

    if experiment_folder is None:
        return

    description_file = "{}/experiment_description.txt".format(
        experiment_folder)
    with open(description_file, "w") as f:
        param_i_correspondence = "\n".join(
            ["{} - {}".format(i, k_s) for i, k_s in enumerate(const_actions)])
        f.write(gen_exp_descr(experiment_name, param_i_correspondence))

    for a in const_actions:
        subfolder = "{}/k={}".format(experiment_folder, a)
        os.mkdir(subfolder)
        run_ps_const_control_experiment_with_files(
            env_entry_point=env_entry_point,
            base_folder=subfolder,
            n_episodes=3,
            max_n_steps=200,
            time_step=1,
            p_reff=1.3,
            const_action=a,
            log_level=logging.INFO,
            p_reff_amplitude=0,
            p_reff_period=200,
            get_seed=lambda: 23)
def validation_experiment(base_folder, env_entry_point):
    experiment_name = "setup_validation"
    experiment_folder = prepare_experiment(base_folder, experiment_name)

    if experiment_folder is None:
        return

    description_file = "{}/experiment_description.txt".format(
        experiment_folder)
    with open(description_file, "w") as f:
        f.write(gen_exp_descr(experiment_name, None))
    run_ps_agent_experiment_with_result_files(
        agent_train_test_once=lambda env: ps_train_test_osp_ql(
            ps_env=env,
            max_number_of_steps=MAX_NUMBER_OF_STEPS,
            n_episodes=N_EPISODES,
            rand_qtab=RAND_QTAB,
            learning_rate=LEARNING_RATE,
            discount_factor=DISCOUNT_FACTOR,
            exploration_rate=EXPLORATION_RATE,
            exploration_decay_rate=EXPLORATION_DECAY_RATE,
            k_s=ACTIONS,
            visualize=VISUALIZE),
        base_folder=experiment_folder,
        n_repeat=N_REPEAT,
        time_step=TIME_STEP,
        p_reff=P_REF,
        log_level=LOG_LEVEL,
        env_entry_point=env_entry_point,
        compute_reward=None,
    )
示例#3
0
def hop_window_experiment(base_folder,
                          env_entry_point,
                          ws_s,
                          experiment_name="hop_window",
                          write_large=True,
                          **kwargs):

    experiment_folder = prepare_experiment(base_folder, experiment_name)

    if experiment_folder is None:
        return

    description_file = "{}/experiment_description.txt".format(
        experiment_folder)
    with open(description_file, "w") as f:
        param_i_correspondence = "\n".join([
            "{} - window size = {}".format(i, ws) for i, ws in enumerate(ws_s)
        ])
        f.write(gen_exp_descr(experiment_name, param_i_correspondence))

    for ws in ws_s:
        subfolder = "{}/window_size={}".format(experiment_folder, ws)
        os.mkdir(subfolder)

        run_ps_agent_experiment_with_result_files(
            agent_train_test_once=lambda env: ps_train_test_window_osp_ql(
                ps_env=env,
                max_number_of_steps=MAX_NUMBER_OF_STEPS,
                n_episodes=N_EPISODES,
                rand_qtab=RAND_QTAB,
                learning_rate=LEARNING_RATE,
                discount_factor=DISCOUNT_FACTOR,
                exploration_rate=EXPLORATION_RATE,
                exploration_decay_rate=EXPLORATION_DECAY_RATE,
                k_s=ACTIONS,
                visualize=VISUALIZE,
                n_bins=N_BINS,
                window_size=ws,
                n_test_episodes=N_TEST_EPISODES,
                n_test_steps=N_TEST_STEPS,
                hop_size=ws),
            base_folder=subfolder,
            n_repeat=N_REPEAT,
            time_step=TIME_STEP,
            p_reff=P_REF,
            log_level=LOG_LEVEL,
            env_entry_point=env_entry_point,
            compute_reward=None,
            write_large=write_large,
            **kwargs)
def exploration_experiment(
        base_folder,
        explor_params,
        env_entry_point,
        experiment_name="exploration(rate_and_discount)_variation"):

    experiment_folder = prepare_experiment(base_folder, experiment_name)

    if experiment_folder is None:
        return

    description_file = "{}/experiment_description.txt".format(
        experiment_folder)
    with open(description_file, "w") as f:
        param_i_correspondence = "\n".join([
            "{} - exploration rate = {}, exploration rate decay = {}".format(
                i, params[0], params[1])
            for i, params in enumerate(explor_params)
        ])
        f.write(gen_exp_descr(experiment_name, param_i_correspondence))

    for expl_rate, expl_decay in explor_params:
        subfolder = "{}/expl_rate={}_decay={}".format(experiment_folder,
                                                      expl_rate, expl_decay)
        os.mkdir(subfolder)
        run_ps_agent_experiment_with_result_files(
            agent_train_test_once=lambda env: ps_train_test_osp_ql(
                ps_env=env,
                max_number_of_steps=MAX_NUMBER_OF_STEPS,
                n_episodes=N_EPISODES,
                rand_qtab=RAND_QTAB,
                learning_rate=LEARNING_RATE,
                discount_factor=DISCOUNT_FACTOR,
                exploration_rate=expl_rate,
                exploration_decay_rate=expl_decay,
                k_s=ACTIONS,
                visualize=VISUALIZE,
                n_test_episodes=N_TEST_EPISODES),
            base_folder=subfolder,
            n_repeat=N_REPEAT,
            time_step=TIME_STEP,
            p_reff=P_REF,
            log_level=LOG_LEVEL,
            env_entry_point=env_entry_point,
            compute_reward=None,
        )
def reward_experiment(base_folder,
                      env_entry_point,
                      compute_reward_s,
                      experiment_name="reward_variation"):
    experiment_folder = prepare_experiment(base_folder, experiment_name)

    if experiment_folder is None:
        return

    description_file = "{}/experiment_description.txt".format(
        experiment_folder)

    param_i_correspondence = "\n".join([
        "{} - reward = {}".format(i, comp_rew[0])
        for i, comp_rew in enumerate(compute_reward_s)
    ])

    with open(description_file, "w") as f:
        f.write(gen_exp_descr(experiment_name, param_i_correspondence))

    for comp_rew in compute_reward_s:
        name, func = comp_rew
        subfolder = "{}/reward-{}".format(experiment_folder, name)
        os.mkdir(subfolder)
        run_ps_agent_experiment_with_result_files(
            agent_train_test_once=lambda env: ps_train_test_osp_ql(
                ps_env=env,
                max_number_of_steps=MAX_NUMBER_OF_STEPS,
                n_episodes=N_EPISODES,
                rand_qtab=RAND_QTAB,
                learning_rate=LEARNING_RATE,
                discount_factor=DISCOUNT_FACTOR,
                exploration_rate=EXPLORATION_RATE,
                exploration_decay_rate=EXPLORATION_DECAY_RATE,
                k_s=ACTIONS,
                visualize=VISUALIZE,
                n_test_episodes=N_TEST_EPISODES),
            base_folder=subfolder,
            n_repeat=N_REPEAT,
            time_step=TIME_STEP,
            p_reff=P_REF,
            log_level=LOG_LEVEL,
            env_entry_point=env_entry_point,
            compute_reward=func,
        )
示例#6
0
def best_combination_experiment_tsp(base_folder,
                                    env_entry_point,
                                    t_s,
                                    experiment_name="step_once_consid_p_reff",
                                    **kwargs):

    experiment_folder = prepare_experiment(base_folder, experiment_name)

    if experiment_folder is None:
        return

    description_file = "{}/experiment_description.txt".format(
        experiment_folder)
    with open(description_file, "w") as f:
        param_i_correspondence = "\n".join(
            ["{} - time_step = {}".format(i, t) for i, t in enumerate(t_s)])
        f.write(gen_exp_descr(experiment_name, param_i_correspondence))

    for t in t_s:
        subfolder = "{}/time_step={}".format(experiment_folder, t)
        os.mkdir(subfolder)
        max_n_steps = MAX_NUMBER_OF_STEPS // t
        run_ps_agent_experiment_with_result_files(
            agent_train_test_once=lambda env: ps_train_test_tsp_ql(
                ps_env=env,
                max_number_of_steps=max_n_steps,
                n_episodes=N_EPISODES,
                rand_qtab=RAND_QTAB,
                learning_rate=LEARNING_RATE,
                discount_factor=DISCOUNT_FACTOR,
                exploration_rate=EXPLORATION_RATE,
                exploration_decay_rate=EXPLORATION_DECAY_RATE,
                k_s=ACTIONS,
                visualize=VISUALIZE,
                n_test_episodes=N_TEST_EPISODES),
            base_folder=subfolder,
            n_repeat=N_REPEAT,
            time_step=t,
            p_reff=P_REF,
            log_level=LOG_LEVEL,
            env_entry_point=env_entry_point,
            compute_reward=None,
            **kwargs)