def __init__( self, env, n_bins_obs=10, memory_size=100, state_preprocess_fn=None, state_preprocess_kwargs=None, ): Wrapper.__init__(self, env) if state_preprocess_fn is None: assert isinstance(env.observation_space, spaces.Box) assert isinstance(env.action_space, spaces.Discrete) self.state_preprocess_fn = state_preprocess_fn or identity self.state_preprocess_kwargs = state_preprocess_kwargs or {} self.memory = TrajectoryMemory(memory_size) self.total_visit_counter = DiscreteCounter(self.env.observation_space, self.env.action_space, n_bins_obs=n_bins_obs) self.episode_visit_counter = DiscreteCounter( self.env.observation_space, self.env.action_space, n_bins_obs=n_bins_obs) self.current_state = None self.curret_step = 0
def __init__(self, env): Wrapper.__init__(self, env, wrap_spaces=True) obs_space = self.env.observation_space assert isinstance(obs_space, Discrete) self.observation_space = Box( low=0.0, high=1.0, shape=(obs_space.n,), dtype=np.uint32 )
def __init__(self, env, horizon): """ Parameters ---------- horizon: int """ Wrapper.__init__(self, env) self.horizon = horizon assert self.horizon >= 1 self.current_step = 0
def __init__(self, env, uncertainty_estimator_fn, uncertainty_estimator_kwargs=None, bonus_scale_factor=1.0, bonus_max=np.inf): Wrapper.__init__(self, env) self.bonus_scale_factor = bonus_scale_factor self.bonus_max = bonus_max uncertainty_estimator_kwargs = uncertainty_estimator_kwargs or {} uncertainty_estimator_fn = load(uncertainty_estimator_fn) if isinstance(uncertainty_estimator_fn, str) else \ uncertainty_estimator_fn self.uncertainty_estimator = uncertainty_estimator_fn( env.observation_space, env.action_space, **uncertainty_estimator_kwargs) self.previous_obs = None
def __init__(self, env): Wrapper.__init__(self, env)
def __init__(self, env, reward_range): Wrapper.__init__(self, env) self.reward_range = reward_range assert reward_range[0] < reward_range[1] assert reward_range[0] > -np.inf and reward_range[1] < np.inf