Exemplo n.º 1
0
    def __init__(self, env, normalize_observations=True, normalize_returns=True,
                 clip_observations=10., clip_rewards=10., gamma=0.99, epsilon=1e-8):
        super().__init__(env)

        self.ob_rms = RunningMeanStd(shape=self.observation_space.shape) if normalize_observations else None
        self.ret_rms = RunningMeanStd(shape=()) if normalize_returns else None
        self.clipob = clip_observations
        self.cliprew = clip_rewards
        self.ret = 0.0
        self.gamma = gamma
        self.epsilon = epsilon
    def __init__(self, environment, device, batch_size, buffer_capacity, buffer_initial_size, noise_std_dev,
                 normalize_observations=False):
        self.device = device
        self.batch_size = batch_size
        self.buffer_capacity = buffer_capacity
        self.buffer_initial_size = buffer_initial_size
        self.normalize_observations = normalize_observations

        self.device = device
        self._environment = environment

        self.backend = DequeBufferBackend(
            buffer_capacity=self.buffer_capacity,
            observation_space=environment.observation_space,
            action_space=environment.action_space
        )

        self.last_observation = self.environment.reset()

        len_action_space = self.environment.action_space.shape[-1]

        self.noise_process = OrnsteinUhlenbeckNoiseProcess(
            np.zeros(len_action_space), float(noise_std_dev) * np.ones(len_action_space)
        )

        self.ob_rms = RunningMeanStd(shape=self.environment.observation_space.shape) if normalize_observations else None
        self.clip_obs = 10.0
Exemplo n.º 3
0
    def __init__(self, environment, device, replay_buffer: ReplayBuffer, discount_factor: typing.Optional[float]=None,
                 normalize_returns: bool=False, forward_steps: int=1, action_noise: typing.Optional[nn.Module]=None):
        self._environment = environment
        self.device = device
        self.replay_buffer = replay_buffer
        self.normalize_returns = normalize_returns
        self.forward_steps = forward_steps
        self.discount_factor = discount_factor
        self.action_noise = action_noise.to(self.device) if action_noise is not None else None

        if self.normalize_returns:
            assert self.discount_factor is not None, \
                "TransitionReplayEnvRoller must have a discount factor defined if normalize_returns is turned on"

        if self.forward_steps > 1:
            assert self.discount_factor is not None, \
                "TransitionReplayEnvRoller must have a discount factor defined if forward_steps is larger than one"

        self.ret_rms = RunningMeanStd(shape=()) if normalize_returns else None

        # Initial observation
        self.last_observation_cpu = torch.from_numpy(self.environment.reset()).clone()
        self.last_observation = self.last_observation_cpu.to(self.device)

        # Return normalization
        self.clip_obs = 5.0
        self.accumulated_returns = np.zeros(environment.num_envs, dtype=np.float32)
Exemplo n.º 4
0
 def __init__(self,
              venv,
              ob=True,
              ret=True,
              clipob=10.,
              cliprew=10.,
              gamma=0.99,
              epsilon=1e-8):
     VecEnvWrapper.__init__(self, venv)
     self.ob_rms = RunningMeanStd(
         shape=self.observation_space.shape) if ob else None
     self.ret_rms = RunningMeanStd(shape=()) if ret else None
     self.clipob = clipob
     self.cliprew = cliprew
     self.ret = np.zeros(self.num_envs)
     self.gamma = gamma
     self.epsilon = epsilon