def __init__(self, rl2_max_path_length, meta_batch_size, task_sampler, **inner_algo_args): self._inner_algo = RL2NPO(**inner_algo_args) self._rl2_max_path_length = rl2_max_path_length self._env_spec = self._inner_algo.env_spec self._flatten_input = self._inner_algo.flatten_input self._policy = self._inner_algo.policy self._discount = self._inner_algo.discount self._meta_batch_size = meta_batch_size self._task_sampler = task_sampler
def __init__(self, rl2_max_path_length, meta_batch_size, task_sampler, meta_evaluator, n_epochs_per_eval, **inner_algo_args): self._inner_algo = RL2NPO(**inner_algo_args) self._rl2_max_path_length = rl2_max_path_length self.env_spec = self._inner_algo._env_spec self._n_epochs_per_eval = n_epochs_per_eval self._policy = self._inner_algo.policy self._discount = self._inner_algo._discount self._meta_batch_size = meta_batch_size self._task_sampler = task_sampler self._meta_evaluator = meta_evaluator
def __init__(self, env_spec, episodes_per_trial, meta_batch_size, task_sampler, meta_evaluator, n_epochs_per_eval, **inner_algo_args): self._env_spec = env_spec _inner_env_spec = EnvSpec( env_spec.observation_space, env_spec.action_space, episodes_per_trial * env_spec.max_episode_length) self._inner_algo = RL2NPO(env_spec=_inner_env_spec, **inner_algo_args) self._rl2_max_episode_length = self._env_spec.max_episode_length self._n_epochs_per_eval = n_epochs_per_eval self._policy = self._inner_algo.policy self._discount = self._inner_algo._discount self._meta_batch_size = meta_batch_size self._task_sampler = task_sampler self._meta_evaluator = meta_evaluator