Python mpi_statistics_scalar примеры использования

Язык программирования: Python

Пространство имен/Пакет: utils.mpi_tools

Метод/Функция: mpi_statistics_scalar

Примеров на hotexamples.com: 7

Python mpi_statistics_scalar - 7 примеров найдено. Это лучшие примеры Python кода для utils.mpi_tools.mpi_statistics_scalar, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

 def retrieve_all(self):
     assert self.eps == self.max_batch
     occup_slice = slice(0, self.ptr)
     self.ptr = 0
     self.eps = 0
     adv_mean, adv_std = mpi_statistics_scalar(self.adv[occup_slice])
     pos_mean, pos_std = mpi_statistics_scalar(self.pos[occup_slice])
     self.adv[occup_slice] = (self.adv[occup_slice] - adv_mean) / adv_std
     self.pos[occup_slice] = (self.pos[occup_slice] - pos_mean) / pos_std
     return [
         self.obs[occup_slice], self.act[occup_slice],
         self.adv[occup_slice], self.pos[occup_slice],
         self.ret[occup_slice], self.lgt[occup_slice]
     ]

Пример #2

Показать файл

Файл: logx.py Проект: thanhkaist/deep_pick_and_place

 def get_stats(self, key):
     """
     Lets an algorithm ask the logger for mean/std/min/max of a diagnostic.
     """
     v = self.epoch_dict[key]
     vals = np.concatenate(v) if isinstance(v[0], np.ndarray) and len(v[0].shape)>0 else v
     return mpi_statistics_scalar(vals)

Пример #3

Показать файл

Файл: logx.py Проект: thanhkaist/deep_pick_and_place

    def log_tabular(self, key, val=None, with_min_and_max=False, average_only=False):
        """
        Log a value or possibly the mean/std/min/max values of a diagnostic.

        Args:
            key (string): The name of the diagnostic. If you are logging a
                diagnostic whose state has previously been saved with 
                ``store``, the key here has to match the key you used there.

            val: A value for the diagnostic. If you have previously saved
                values for this key via ``store``, do *not* provide a ``val``
                here.

            with_min_and_max (bool): If true, log min and max values of the 
                diagnostic over the epoch.

            average_only (bool): If true, do not log the standard deviation
                of the diagnostic over the epoch.
        """
        if val is not None:
            super().log_tabular(key,val)
        else:
            v = self.epoch_dict[key]
            vals = np.concatenate(v) if isinstance(v[0], np.ndarray) and len(v[0].shape)>0 else v
            stats = mpi_statistics_scalar(vals, with_min_and_max=with_min_and_max)
            super().log_tabular(key if average_only else 'Average' + key, stats[0])
            if not(average_only):
                super().log_tabular('Std'+key, stats[1])
            if with_min_and_max:
                super().log_tabular('Max'+key, stats[3])
                super().log_tabular('Min'+key, stats[2])
        self.epoch_dict[key] = []

Пример #4

Показать файл

Файл: vpg.py Проект: michaelguan1992/spinningup-in-deeprl-tensorflow2

 def get(self):
     """
 Call this at the end of an epoch to get all of the data from
 the buffer, with advantages appropriately normalized (shifted to have
 mean zero and std one). Also, resets some pointers in the buffer.
 """
     assert self.ptr == self.max_size  # buffer has to be full before you can get
     self.ptr, self.path_start_idx = 0, 0
     # the next two lines implement the advantage normalization trick
     adv_mean, adv_std = mpi_statistics_scalar(self.adv_buf)
     self.adv_buf = (self.adv_buf - adv_mean) / adv_std
     return self.obs_buf, self.act_buf, self.adv_buf, self.ret_buf, self.logp_buf

Пример #5

Показать файл

Файл: core.py Проект: dogeplusplus/ball-balancer

    def sample(self):
        """Get values from the buffer for training.

        Returns:
            Dictionary of environment-agent information for training.
        """
        assert self.ptr == self.size
        self.ptr, self.path_start_idx = 0, 0
        adv_mean, adv_std = mpi_statistics_scalar(self.advantages)
        self.advantages = (self.advantages - adv_mean) / adv_std
        data = dict(obs=self.observations,
                    act=self.actions,
                    ret=self.returns,
                    adv=self.advantages,
                    logp=self.logp)
        return {
            k: torch.as_tensor(v, dtype=torch.float32)
            for k, v in data.items()
        }

Пример #6

Показать файл

 def get(self):
     """
     Call this at the end of an epoch to get all of the data from
     the buffer, with advantages appropriately normalized (shifted to have
     mean zero and std one). Also, resets some pointers in the buffer.
     """
     assert self.ptr == self.max_size  # buffer has to be full before you can get
     self.ptr, self.path_start_idx = 0, 0
     # the next two lines implement the advantage normalization trick
     adv_mean, adv_std = mpi_statistics_scalar(self.adv_buf)
     self.adv_buf = (self.adv_buf - adv_mean) / adv_std
     data = dict(obs=self.obs_buf,
                 act=self.act_buf,
                 ret=self.ret_buf,
                 adv=self.adv_buf,
                 logp=self.logp_buf)
     return {
         k: torch.as_tensor(v, dtype=torch.float32)
         for k, v in data.items()
     }

Пример #7

Показать файл

Файл: vpg.py Проект: michaelguan1992/spinningup-in-deeprl-tensorflow2

def vpg(env,
        ac_kwargs=None,
        seed=0,
        steps_per_epoch=4000,
        epochs=50,
        gamma=0.99,
        lam=0.97,
        max_ep_len=1000,
        save_freq=10):

    seed += 10000 * proc_id()
    tf.random.set_seed(seed)
    np.random.seed(seed)
    # Create actor-critic agent and synchronize it
    ac_kwargs['action_space'] = env.action_space

    actor_critic = ActorCritic(**ac_kwargs)

    # Experience buffer
    obs_dim = env.observation_space.shape
    act_dim = env.action_space.shape
    local_steps_per_epoch = int(steps_per_epoch / num_procs())
    buf = VPGBuffer(obs_dim, act_dim, local_steps_per_epoch, gamma, lam)
    """
  Main loop: collect experience in env and update/log each epoch
  """

    # o for observation, r for reward, d for done
    o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0

    all_ep_ret = []
    summary_ep_ret = []
    totalEnvInteracts = []
    for epoch in range(epochs):
        for t in range(local_steps_per_epoch):
            a, logp_t, v_t = actor_critic(o.reshape(1, -1))

            # save and log
            a = a.numpy()[0]
            buf.store(o, a, r, v_t, logp_t)

            o, r, d, _ = env.step(a)
            ep_ret += r
            ep_len += 1

            terminal = d or (ep_len == max_ep_len)
            if terminal or (t == local_steps_per_epoch - 1):
                if not (terminal) and proc_id() == 0:
                    print('Warning: trajectory cut off by epoch at %d steps.' %
                          ep_len)
                # if trajectory didn't reach terminal state, bootstrap value target
                last_val = r if d else v_t
                buf.finish_path(last_val)

                if terminal:
                    all_ep_ret.append(ep_ret)
                # reset environment
                o, r, d, ep_ret, ep_len = env.reset(), 0, False, 0, 0

        # Perform VPG update!
        actor_critic.update(buf)
        mean, std = mpi_statistics_scalar(all_ep_ret)
        all_ep_ret = []
        if proc_id() == 0:
            print(f'epoch {epoch}: mean {mean}, std {std}')
        summary_ep_ret.append(mean)
        totalEnvInteracts.append((epoch + 1) * steps_per_epoch)

    if proc_id() == 0:
        plt.plot(totalEnvInteracts, summary_ep_ret)
        plt.grid(True)
        plt.show()