Python Transitions примеры использования

Язык программирования: Python

Пространство имен/Пакет: vel.rl.api

Класс/Тип: Transitions

Примеров на hotexamples.com: 7

Python Transitions - 7 примеров найдено. Это лучшие примеры Python кода для vel.rl.api.Transitions, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Transitions(7)

Основные методы

Transitions (7)

Пример #1

Показать файл

Файл: deque_replay_roller_ou_noise.py Проект: landoufulxf/vel

    def rollout(self, batch_info, model) -> Rollout:
        """ Roll-out the environment and return it """
        observation_tensor = torch.from_numpy(self.last_observation).to(self.device)

        step = model.step(observation_tensor[None])
        action = step['actions'].detach().cpu().numpy()[0]
        noise = self.noise_process()

        action_perturbed = np.clip(
            action + noise, self.environment.action_space.low, self.environment.action_space.high
        )

        observation, reward, done, info = self.environment.step(action_perturbed)

        if self.ob_rms is not None:
            self.ob_rms.update(observation)

        self.backend.store_transition(self.last_observation, action_perturbed, reward, done)

        # Usual, reset on done
        if done:
            observation = self.environment.reset()
            self.noise_process.reset()

        self.last_observation = observation

        return Transitions(
            size=1,
            environment_information=[info],
            transition_tensors={
                'actions': step['actions'],
                'values': step['values']
            },
        )

Пример #2

Показать файл

    def sample(self, batch_info, model) -> Transitions:
        """ Sample experience from replay buffer and return a batch """
        indexes = self.backend.sample_batch_uniform(self.batch_size,
                                                    self.frame_stack)
        batch = self.backend.get_batch(indexes, self.frame_stack)

        observations = torch.from_numpy(batch['states']).to(self.device)
        observations_plus1 = torch.from_numpy(batch['states+1']).to(
            self.device)
        dones = torch.from_numpy(batch['dones'].astype(np.float32)).to(
            self.device)
        rewards = torch.from_numpy(batch['rewards'].astype(np.float32)).to(
            self.device)
        actions = torch.from_numpy(batch['actions']).to(self.device)

        return Transitions(size=self.batch_size,
                           environment_information=None,
                           transition_tensors={
                               'observations': observations,
                               'observations_next': observations_plus1,
                               'dones': dones,
                               'rewards': rewards,
                               'actions': actions,
                               'weights': torch.ones_like(rewards)
                           })

Пример #3

Показать файл

    def rollout(self, batch_info, model) -> Rollout:
        """ Roll-out the environment and return it """
        epsilon_value = self.epsilon_schedule.value(batch_info['progress'])
        batch_info['epsilon'] = epsilon_value

        last_observation = np.concatenate([
            self.backend.get_frame(self.backend.current_idx,
                                   self.frame_stack - 1), self.last_observation
        ],
                                          axis=-1)

        observation_tensor = torch.from_numpy(last_observation[None]).to(
            self.device)
        step = model.step(observation_tensor)

        epsgreedy_step = self.epsgreedy_action(step['actions'], epsilon_value)
        action = epsgreedy_step.item()

        observation, reward, done, info = self.environment.step(action)
        self.backend.store_transition(self.last_observation, action, reward,
                                      done)

        # Usual, reset on done
        if done:
            observation = self.environment.reset()

        self.last_observation = observation

        return Transitions(size=1,
                           environment_information=[info],
                           transition_tensors={
                               'actions': epsgreedy_step.unsqueeze(0),
                               'values': step['values']
                           },
                           extra_data={'epsilon': epsilon_value})

Пример #4

Показать файл

Файл: prioritized_replay_roller_epsgreedy.py Проект: ryan-leung/ml_monorepo

    def sample(self, batch_info, model) -> Transitions:
        """ Sample experience from replay buffer and return a batch """
        probs, indexes, tree_idxs = self.backend.sample_batch_prioritized(self.batch_size, self.frame_stack)
        batch = self.backend.get_batch(indexes, self.frame_stack)

        # Normalize weights properly
        priority_weight = self.priority_weight_schedule.value(batch_info['progress'])

        probs = np.stack(probs) / self.backend.segment_tree.total()
        capacity = self.backend.deque.current_size
        weights = (capacity * probs) ** (-priority_weight)
        weights = weights / weights.max()

        observations = torch.from_numpy(batch['states']).to(self.device)
        observations_plus1 = torch.from_numpy(batch['states+1']).to(self.device)
        dones = torch.from_numpy(batch['dones'].astype(np.float32)).to(self.device)
        rewards = torch.from_numpy(batch['rewards'].astype(np.float32)).to(self.device)
        actions = torch.from_numpy(batch['actions']).to(self.device)
        weights = torch.from_numpy(weights.astype(np.float32)).to(self.device)

        return Transitions(
            size=self.batch_size,
            environment_information=None,
            transition_tensors={
                'observations': observations,
                'observations_next': observations_plus1,
                'dones': dones,
                'rewards': rewards,
                'actions': actions,
                'weights': weights,
            },
            extra_data={
                'tree_idxs': tree_idxs
            }
        )

Пример #5

Показать файл

    def sample(self, batch_info, model) -> Transitions:
        """ Sample experience from replay buffer and return a batch """
        indexes = self.backend.sample_batch_uniform(self.batch_size,
                                                    history_length=1)
        batch = self.backend.get_batch(indexes, history_length=1)

        observations = self._observation_list_to_tensor(batch['states'])
        observations_plus1 = self._observation_list_to_tensor(
            batch['states+1'])

        rewards = batch['rewards'].astype(np.float32)

        if self.ret_rms is not None:
            rewards = np.clip(rewards / np.sqrt(self.ret_rms.var + 1e-8),
                              -self.clip_obs, self.clip_obs)

        dones = torch.from_numpy(batch['dones'].astype(np.float32)).to(
            self.device)
        rewards = torch.from_numpy(rewards).to(self.device)
        actions = torch.from_numpy(batch['actions']).to(self.device)

        return Transitions(size=self.batch_size,
                           environment_information=[],
                           transition_tensors={
                               'observations': observations,
                               'observations_next': observations_plus1,
                               'dones': dones,
                               'rewards': rewards,
                               'actions': actions
                           })

Пример #6

Показать файл

    def rollout(self, batch_info, model) -> Rollout:
        """ Roll-out the environment and return it """
        # observation_tensor = self._observation_to_tensor(self.last_observation[None])
        observation_tensor = self._observation_to_tensor(self.last_observation)

        step = model.step(observation_tensor)
        action = step['actions'].detach().cpu().numpy()[0]
        noise = self.noise_process()

        action_perturbed = np.clip(action + noise,
                                   self.environment.action_space.low,
                                   self.environment.action_space.high)

        observation, reward, done, info = self.environment.step(
            action_perturbed)

        if self.ob_rms is not None:
            self.ob_rms.update(observation)

        if self.ret_rms is not None:
            self.accumulated_return = reward + self.discount_factor * self.accumulated_return

            self.ret_rms.update(np.array([self.accumulated_return]))

        self.backend.store_transition(self.last_observation, action_perturbed,
                                      reward, done)

        # Usual, reset on done
        if done:
            observation = self.environment.reset()
            self.noise_process.reset()
            self.accumulated_return = 0.0

        self.last_observation = observation

        return Transitions(
            size=1,
            environment_information=info,
            transition_tensors={
                'actions': step['actions'],
                'values': step['values']
            },
        )

Пример #7

Показать файл

Файл: deque_replay_roller_ou_noise.py Проект: landoufulxf/vel

    def sample(self, batch_info, model) -> Transitions:
        """ Sample experience from replay buffer and return a batch """
        indexes = self.backend.sample_batch_uniform(self.batch_size, history_length=1)
        batch = self.backend.get_batch(indexes, history_length=1)

        observations = torch.from_numpy(self._filter_observation(batch['states'])).to(self.device)
        observations_plus1 = torch.from_numpy(self._filter_observation(batch['states+1'])).to(self.device)
        dones = torch.from_numpy(batch['dones'].astype(np.float32)).to(self.device)
        rewards = torch.from_numpy(batch['rewards'].astype(np.float32)).to(self.device)
        actions = torch.from_numpy(batch['actions']).to(self.device)

        return Transitions(
            size=self.batch_size,
            environment_information=[],
            transition_tensors={
                'observations': observations,
                'observations_next': observations_plus1,
                'dones': dones,
                'rewards': rewards,
                'actions': actions
            }
        )