Python termination示例

编程语言: Python

命名空间/包名称: alf.data_structures

方法/功能: termination

hotexamples.com的示例: 2

Python termination - 已找到2个示例。这些是从开源项目中提取的最受好评的alf.data_structures.termination现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： random_alf_environment.py 项目： zhuboli/alf

    def _step(self, action):
        if self._done:
            return self.reset()

        if self._action_spec:
            nest.assert_same_structure(self._action_spec, action)

        self._num_steps += 1

        observation = self._get_observation()
        if self._num_steps < self._min_duration:
            self._done = False
        elif self._max_duration and self._num_steps >= self._max_duration:
            self._done = True
        else:
            self._done = self._rng.uniform() < self._episode_end_probability

        if self._batch_size:
            action = nest.map_structure(
                lambda t: np.concatenate([np.expand_dims(t, 0)] * self.
                                         _batch_size), action)

        if self._done:
            reward = self._reward_fn(ds.StepType.LAST, action, observation)
            self._check_reward_shape(reward)
            time_step = ds.termination(observation, action, reward,
                                       self._env_id)
            self._num_steps = 0
        else:
            reward = self._reward_fn(ds.StepType.MID, action, observation)
            self._check_reward_shape(reward)
            time_step = ds.transition(observation, action, reward,
                                      self._discount, self._env_id)

        return time_step

示例#2

显示文件

文件： alf_gym_wrapper.py 项目： soychanq/alf

    def _step(self, action):
        # Automatically reset the environments on step if they need to be reset.
        if self._auto_reset and self._done:
            return self.reset()

        observation, reward, self._done, self._info = self._gym_env.step(
            action)
        observation = self._to_spec_dtype_observation(observation)
        self._info = nest.map_structure(_as_array, self._info)

        if self._done:
            return ds.termination(
                observation,
                action,
                reward,
                self._reward_spec,
                self._env_id,
                env_info=self._info)
        else:
            return ds.transition(
                observation,
                action,
                reward,
                self._reward_spec,
                self._discount,
                self._env_id,
                env_info=self._info)