Python ReplayBuffer.append примеры использования

Язык программирования: Python

Пространство имен/Пакет: buffer

Класс/Тип: ReplayBuffer

Метод/Функция: append

Примеров на hotexamples.com: 2

Python ReplayBuffer.append - 2 примера найдено. Это лучшие примеры Python кода для buffer.ReplayBuffer.append, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ReplayBuffer(30)

sample(30)

add(30)

push(26)

sample_buffer(16)

store_transition(15)

sample_batch(11)

store(9)

get_minibatch(8)

append(2)

add_transition(2)

size(2)

update_priorities(2)

random_next_batch(2)

reset(2)

add_experience(2)

getBatch(2)

store_trans(1)

store_frame(1)

store_episode(1)

save(1)

store_transtions(1)

sample_minibatch(1)

random_indices(1)

remember(1)

length(1)

isSampling(1)

insert(1)

encode_recent_observation(1)

dequeue(1)

clear(1)

append_data(1)

add_record(1)

add_items(1)

update_priority(1)

Пример #1

Показать файл

Файл: test_buffer.py Проект: Benjamin-Etheredge/DeepQExploration

 def test_append(self):
     count = 100
     start_length = count // 2
     max_length = count
     buffer = ReplayBuffer(start_length=start_length, max_length=max_length)
     for append_count in range(max_length*2):
         buffer.append(append_count)
         self.assertEqual(len(buffer.buffer), min(append_count+1, max_length), "Incorrect buffer size.")
         self.assertEqual(buffer.buffer[0], max(0, (append_count+1) - max_length), "Incorrect first value.")
         self.assertEqual(buffer.buffer[-1], append_count, "Incorrect last value.")

Пример #2

Показать файл

class Brain:
    """
    The Brain that contains all the models
    """
    def __init__(self,
                 num_states,
                 num_actions,
                 action_high,
                 action_low,
                 gamma=GAMMA,
                 rho=RHO,
                 std_dev=STD_DEV):
        # initialize everything
        self.actor_network = ActorNetwork(num_states, num_actions, action_high)
        self.critic_network = CriticNetwork(num_states, num_actions,
                                            action_high)
        self.actor_target = ActorNetwork(num_states, num_actions, action_high)
        self.critic_target = CriticNetwork(num_states, num_actions,
                                           action_high)

        # Making the weights equal initially
        self.actor_target.set_weights(self.actor_network.get_weights())
        self.critic_target.set_weights(self.critic_network.get_weights())

        self.buffer = ReplayBuffer(BUFFER_SIZE, BATCH_SIZE)
        self.gamma = tf.constant(gamma)
        self.rho = rho
        self.action_high = action_high
        self.action_low = action_low
        self.num_states = num_states
        self.num_actions = num_actions
        self.noise = OUActionNoise(mean=np.zeros(1),
                                   std_deviation=float(std_dev) * np.ones(1))

        # optimizers
        self.critic_optimizer = tf.keras.optimizers.Adam(CRITIC_LR,
                                                         amsgrad=True)
        self.actor_optimizer = tf.keras.optimizers.Adam(ACTOR_LR, amsgrad=True)

        # temporary variable for side effects
        self.cur_action = None

        # define update weights with tf.function for improved performance
        @tf.function(input_signature=[
            tf.TensorSpec(shape=(None, num_states), dtype=tf.float32),
            tf.TensorSpec(shape=(None, num_actions), dtype=tf.float32),
            tf.TensorSpec(shape=(None, 1), dtype=tf.float32),
            tf.TensorSpec(shape=(None, num_states), dtype=tf.float32),
            tf.TensorSpec(shape=(None, 1), dtype=tf.float32),
        ])
        def update_weights(s, a, r, sn, d):
            """
            Function to update weights with optimizer
            """
            with tf.GradientTape() as tape:
                # define target
                y = r + self.gamma * (1 - d) * self.critic_target(
                    [sn, self.actor_target(sn)])
                # define the delta Q
                critic_loss = tf.math.reduce_mean(
                    tf.math.abs(y - self.critic_network([s, a])))
            critic_grad = tape.gradient(
                critic_loss, self.critic_network.trainable_variables)
            self.critic_optimizer.apply_gradients(
                zip(critic_grad, self.critic_network.trainable_variables))

            with tf.GradientTape() as tape:
                # define the delta mu
                actor_loss = -tf.math.reduce_mean(
                    self.critic_network([s, self.actor_network(s)]))
            actor_grad = tape.gradient(actor_loss,
                                       self.actor_network.trainable_variables)
            self.actor_optimizer.apply_gradients(
                zip(actor_grad, self.actor_network.trainable_variables))
            return critic_loss, actor_loss

        self.update_weights = update_weights

    def act(self, state, _notrandom=True, noise=True):
        """
        Run action by the actor network

        Args:
            state: the current state
            _notrandom: whether greedy is used
            noise: whether noise is to be added to the result action (this improves exploration)

        Returns:
            the resulting action
        """
        self.cur_action = (
            self.actor_network(state)[0].numpy() if _notrandom else
            (np.random.uniform(self.action_low, self.action_high,
                               self.num_actions)) +
            (self.noise() if noise else 0))
        self.cur_action = np.clip(self.cur_action, self.action_low,
                                  self.action_high)

        return self.cur_action

    def remember(self, prev_state, reward, state, done):
        """
        Store states, reward, done value to the buffer
        """
        # record it in the buffer based on its reward
        self.buffer.append(prev_state, self.cur_action, reward, state, done)

    def learn(self, entry):
        """
        Run update for all networks (for training)
        """
        s, a, r, sn, d = zip(*entry)

        c_l, a_l = self.update_weights(
            tf.convert_to_tensor(s, dtype=tf.float32),
            tf.convert_to_tensor(a, dtype=tf.float32),
            tf.convert_to_tensor(r, dtype=tf.float32),
            tf.convert_to_tensor(sn, dtype=tf.float32),
            tf.convert_to_tensor(d, dtype=tf.float32))

        update_target(self.actor_target, self.actor_network, self.rho)
        update_target(self.critic_target, self.critic_network, self.rho)

        return c_l, a_l

    def save_weights(self, path):
        """
        Save weights to `path`
        """
        parent_dir = os.path.dirname(path)
        if not os.path.exists(parent_dir):
            os.makedirs(parent_dir)
        # Save the weights
        self.actor_network.save_weights(path + "an.h5")
        self.critic_network.save_weights(path + "cn.h5")
        self.critic_target.save_weights(path + "ct.h5")
        self.actor_target.save_weights(path + "at.h5")

    def load_weights(self, path):
        """
        Load weights from path
        """
        try:
            self.actor_network.load_weights(path + "an.h5")
            self.critic_network.load_weights(path + "cn.h5")
            self.critic_target.load_weights(path + "ct.h5")
            self.actor_target.load_weights(path + "at.h5")
        except OSError as err:
            logging.warning("Weights files cannot be found, %s", err)