Пример #1
0
def make_custom_env(
    seed,
    sb_version,
    env_kwargs: EnvVariables = None,
    env_name="CartPole-v1",
    continue_learning=False,
    log_dir=None,
    algo_name="ppo2",
    evaluate=False,
    evaluate_during_learning=False,
    normalize_kwargs=None,
    continue_learning_suffix="continue_learning",
):
    orig_log_dir = log_dir
    if continue_learning and log_dir:
        log_dir = log_dir + "_" + continue_learning_suffix + "/"

    if normalize_kwargs is None:
        normalize_kwargs = {}

    info_keywords = ()

    if env_name == "CartPole-v1":
        cartpole_env_params = env_kwargs.instantiate_env()
        env = CartPoleEnvWrapper(**cartpole_env_params)
        env = TimeLimit(env, max_episode_steps=500)

    elif env_name == "Pendulum-v0":
        pendulum_env_params = env_kwargs.instantiate_env()
        env = PendulumEnvWrapper(**pendulum_env_params)
        env = TimeLimit(env, max_episode_steps=200)

    elif env_name == "MountainCar-v0" and algo_name != "sac":
        mountaincar_env_params = env_kwargs.instantiate_env()
        env = MountainCarEnvWrapper(**mountaincar_env_params)
        env = TimeLimit(env, max_episode_steps=200)

    elif env_name == "MountainCar-v0" and algo_name == "sac":
        mountaincar_env_params = env_kwargs.instantiate_env()
        env = MountainCarEnvWrapper(**mountaincar_env_params)
        env = TimeLimit(env, max_episode_steps=999)

    elif env_name == "Acrobot-v1":
        acrobot_env_params = env_kwargs.instantiate_env()
        env = AcrobotEnvWrapper(**acrobot_env_params)
        env = TimeLimit(env, max_episode_steps=500)

    else:
        env = gym.make(env_name)

    if log_dir is not None and not evaluate:
        log_file = os.path.join(log_dir, "0")
        logger.debug("Saving monitor files in {}".format(log_file))
        env = Monitor(env, log_file, info_keywords=info_keywords)

    if len(normalize_kwargs) > 0:
        env = normalize_env(
            env=env,
            sb_version=sb_version,
            orig_log_dir=orig_log_dir,
            continue_learning=continue_learning,
            evaluate=evaluate,
            evaluate_during_learning=evaluate_during_learning,
            normalize_kwargs=normalize_kwargs,
        )

    if (len(normalize_kwargs) == 0 and not evaluate_during_learning
            and ((evaluate and algo_name == "ppo2") or
                 (continue_learning and algo_name == "ppo2"))):
        env = DummyVecEnv([lambda: env])

    env.seed(seed)
    return env
Пример #2
0
    def __init__(
        self,
        agent: AbstractAgent,
        num_iterations: int,
        algo_name: str,
        env_name: str,
        tb_log_name: str,
        continue_learning_suffix: str,
        env_variables: EnvVariables,
        param_names=None,
        runs_for_probability_estimation: int = 1,
        buffer_file: str = None,
        archive_file: str = None,
        executions_skipped_file: str = None,
        parallelize_search: bool = False,
        monitor_search_every: bool = False,
        binary_search_epsilon: float = 0.05,
        start_search_time: float = None,
        starting_progress_report_number: int = 0,
        stop_at_first_iteration: bool = False,
        exp_suffix: str = None,
    ):
        assert agent, "agent should have a value: {}".format(agent)
        assert algo_name, "algo_name should have a value: {}".format(algo_name)
        assert env_name, "env_name should have a value: {}".format(env_name)

        self.agent = agent
        self.num_iterations = num_iterations
        self.init_env_variables = env_variables
        self.previous_num_iterations = None
        self.start_time = time.time()
        self.logger = Log("Random")
        self.param_names = param_names
        self.all_params = env_variables.instantiate_env()
        self.runs_for_probability_estimation = runs_for_probability_estimation
        self.buffer_file = buffer_file
        self.archive_file = archive_file
        self.parallelize_search = parallelize_search
        self.stop_at_first_iteration = stop_at_first_iteration
        self.exp_suffix = exp_suffix

        if param_names:
            self.param_names_string = "_".join(param_names)

        # TODO: refactor buffer restoring in abstract class extended by search algo
        #  (for now only random search and alphatest)
        if buffer_file:
            previously_saved_buffer = read_saved_buffer(
                buffer_file=buffer_file)
            index_last_slash = buffer_file.rindex("/")

            self.algo_save_dir = buffer_file[:index_last_slash]
            self.logger.debug(
                "Algo save dir from restored execution: {}".format(
                    self.algo_save_dir))
            self.buffer_env_predicate_pairs = BufferEnvPredicatePairs(
                save_dir=self.algo_save_dir)
            self.archive = Archive(save_dir=self.algo_save_dir,
                                   epsilon=binary_search_epsilon)

            # restore buffer
            for buffer_item in previously_saved_buffer:
                previous_env_variables = instantiate_env_variables(
                    algo_name=algo_name,
                    discrete_action_space=self.
                    all_params["discrete_action_space"],
                    env_name=env_name,
                    param_names=param_names,
                    env_values=buffer_item.get_env_values(),
                )
                self.buffer_env_predicate_pairs.append(
                    EnvPredicatePair(
                        env_variables=previous_env_variables,
                        pass_probability=buffer_item.get_pass_probability(),
                        predicate=buffer_item.is_predicate(),
                        regression_probability=buffer_item.
                        get_regression_probability(),
                        probability_estimation_runs=buffer_item.
                        get_probability_estimation_runs(),
                        regression_estimation_runs=buffer_item.
                        get_regression_estimation_runs(),
                        model_dirs=buffer_item.get_model_dirs(),
                    ))
            assert archive_file, (
                "when buffer file is available so needs to be the archive file to "
                "restore a previous execution")
            try:
                previous_num_iterations_buffer = get_result_file_iteration_number(
                    filename=buffer_file)
                previous_num_iterations_archive = get_result_file_iteration_number(
                    filename=archive_file)
                assert (previous_num_iterations_buffer ==
                        previous_num_iterations_archive
                        ), "The two nums must coincide: {}, {}".format(
                            previous_num_iterations_buffer,
                            previous_num_iterations_archive)
                previous_num_iterations = previous_num_iterations_buffer + 1
            except ValueError as e:
                raise ValueError(e)

            self.previous_num_iterations = previous_num_iterations
            self.logger.info(
                "Restore previous execution of {} iterations.".format(
                    previous_num_iterations))

            # restore archive
            previously_saved_archive = read_saved_archive(
                archive_file=archive_file)
            t_env_variables = None
            f_env_variables = None
            for env_values, predicate in previously_saved_archive:
                all_params = env_variables.instantiate_env()
                previous_env_variables = instantiate_env_variables(
                    algo_name=algo_name,
                    discrete_action_space=all_params["discrete_action_space"],
                    env_name=env_name,
                    param_names=param_names,
                    env_values=env_values,
                )
                if predicate:
                    t_env_variables = previous_env_variables
                else:
                    f_env_variables = previous_env_variables

                if t_env_variables and f_env_variables:
                    self.archive.append(t_env_variables=t_env_variables,
                                        f_env_variables=f_env_variables)
                    t_env_variables = None
                    f_env_variables = None

                # restore executions skipped
                previously_saved_executions_skipped = read_saved_buffer_executions_skipped(
                    buffer_executions_skipped_file=executions_skipped_file)
                for buffer_executions_skipped_item in previously_saved_executions_skipped:
                    previous_env_variables_skipped = instantiate_env_variables(
                        algo_name=algo_name,
                        discrete_action_space=self.
                        all_params["discrete_action_space"],
                        env_name=env_name,
                        param_names=param_names,
                        env_values=buffer_executions_skipped_item.
                        env_values_skipped,
                    )
                    env_predicate_pair_skipped = EnvPredicatePair(
                        env_variables=previous_env_variables_skipped,
                        predicate=buffer_executions_skipped_item.predicate)
                    previous_env_variables_executed = instantiate_env_variables(
                        algo_name=algo_name,
                        discrete_action_space=self.
                        all_params["discrete_action_space"],
                        env_name=env_name,
                        param_names=param_names,
                        env_values=buffer_executions_skipped_item.
                        env_values_executed,
                    )
                    env_predicate_pair_executed = EnvPredicatePair(
                        env_variables=previous_env_variables_executed,
                        predicate=buffer_executions_skipped_item.predicate)
                    self.buffer_executions_skipped.append(
                        ExecutionSkipped(
                            env_predicate_pair_skipped=
                            env_predicate_pair_skipped,
                            env_predicate_pair_executed=
                            env_predicate_pair_executed,
                            search_component=buffer_executions_skipped_item.
                            search_component,
                        ))
        else:
            attempt = 0

            suffix = "n_iterations_"
            if self.param_names:
                suffix += self.param_names_string + "_"
            if self.exp_suffix:
                suffix += self.exp_suffix + "_"
            suffix += str(num_iterations)

            algo_save_dir = os.path.abspath(HOME + "/random/" + env_name +
                                            "/" + algo_name + "/" + suffix +
                                            "_" + str(attempt))
            _algo_save_dir = algo_save_dir
            while os.path.exists(_algo_save_dir):
                attempt += 1
                _algo_save_dir = algo_save_dir[:-1] + str(attempt)
            self.algo_save_dir = _algo_save_dir
            os.makedirs(self.algo_save_dir)
            self.buffer_env_predicate_pairs = BufferEnvPredicatePairs(
                save_dir=self.algo_save_dir)
            # assuming initial env_variables satisfies the predicate of adequate performance
            if self.runs_for_probability_estimation:
                env_predicate_pair = EnvPredicatePair(
                    env_variables=self.init_env_variables,
                    predicate=True,
                    probability_estimation_runs=[True] *
                    self.runs_for_probability_estimation,
                )
            else:
                env_predicate_pair = EnvPredicatePair(
                    env_variables=self.init_env_variables, predicate=True)
            self.buffer_env_predicate_pairs.append(env_predicate_pair)
            self.buffer_executions_skipped = BufferExecutionsSkipped(
                save_dir=self.algo_save_dir)
            self.archive = Archive(save_dir=self.algo_save_dir,
                                   epsilon=binary_search_epsilon)

        self.env_name = env_name
        self.algo_name = algo_name
        self.tb_log_name = tb_log_name
        self.continue_learning_suffix = continue_learning_suffix
        self.binary_search_epsilon = binary_search_epsilon

        self.runner = Runner(
            agent=self.agent,
            runs_for_probability_estimation=self.
            runs_for_probability_estimation,
        )

        self.monitor_search_every = monitor_search_every
        self.monitor_progress = None
        if self.monitor_search_every != -1 and self.monitor_search_every > 0:
            self.monitor_progress = MonitorProgress(
                algo_name=self.algo_name,
                env_name=standardize_env_name(env_name=self.env_name),
                results_dir=self.algo_save_dir,
                param_names_string=self.param_names_string,
                search_type="random",
                start_search_time=start_search_time,
                starting_progress_report_number=starting_progress_report_number,
            )