def __init__(self,
                 buffer_size,
                 alpha,
                 beta_zero,
                 beta_increment_size=0.001,
                 epsilon=0.1,
                 max_priority=1.,
                 seed=None):
        """Priority replay buffer initialiser.

        Args:
            buffer_size (int): capacity of the replay buffer.
            alpha (float): priority scaling hyperparameter.
            beta_zero (float): importance sampling scaling hyperparameter.
            beta_increment_size (float): beta annealing rate.
            epsilon (float): base priority to ensure non-zero sampling probability.
            max_priority (float): initial maximum priority.
            seed (int): seed for random number generator
       """
        random.seed(seed)

        self.sum_tree = SumTree(buffer_size)
        self.memory = {}
        self.experience = namedtuple(
            "experience", ["state", "action", "reward", "next_state", "done"])
        self.buffer_size = buffer_size
        self.beta_increment_size = beta_increment_size
        self.max_priority = max_priority**alpha
        self.min_priority = max_priority**alpha
        self.last_min_update = 0

        self.alpha = alpha
        self.beta = beta_zero
        self.epsilon = epsilon
Exemplo n.º 2
0
    def __init__(self, batch_size, max_size, beta):
        self.batch_size = batch_size  # mini batch大小
        self.max_size = 2**math.floor(
            math.log2(max_size))  # 保证 sum tree 为完全二叉树
        self.beta = beta

        self._sum_tree = SumTree(max_size)
    def __init__(self, buffer_size, seed):
        """Initialize a ReplayBuffer object.

        Params
        ======
            seed (int): random seed
        """
        self.memory = SumTree(buffer_size)
        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
        self.seed = random.seed(seed)

        # epsilon: small amount to avoid zero priority
        # alpha: [0~1] determines how much prioritization is used. with 0, we would get the uniform case
        # beta: Controls importance-sampling compensation. fully compensates for the non-uniform probabilities
        #   when beta=1. The unbiased nature of the updates is most important near convergence at the end of
        #   training, so we define a schedule on the exponent beta that starts from initial value and reaches 1
        #   only at the end of learning.

        self.epsilon = 0.01
        self.alpha = 0.6
        
        beta_start = 0.4
        self.beta_end = 1.0
        self.beta = beta_start
        beta_increments = 200
        self.beta_increment = (self.beta_end - beta_start)/beta_increments
Exemplo n.º 4
0
    def __init__(self, max_size, alpha, eps):
        self.max_size = max_size
        self.alpha = alpha
        self.eps = eps

        self.tree = SumTree(max_size)
        self.last_idxs = None
        self.size = 0
Exemplo n.º 5
0
    def __init__(self, capacity, batch_size):
        self.capacity = capacity
        self.batch_size = batch_size
        self.tree = SumTree(capacity=capacity)

        self.alpha = 0.6
        self.beta = 0.4
        self.p_epsilon = 1e-4
        self.batch_size = 50
Exemplo n.º 6
0
 def __init__(self, memory_size):
     self.memory_size = memory_size
     self.memory = SumTree(memory_size)
     self.epsilon = 0.0001  # small amount to avoid zero priority
     self.alpha = 0.6  # adj_pri = pri^alpha
     self.beta = 0.4  # importance-sampling, from initial value increasing to 1
     self.beta_max = 1
     self.beta_increment_per_sampling = 0.001
     self.abs_err_upper = 1.  # clipped td error
Exemplo n.º 7
0
 def __init__(self, action_size, buffer_size, batch_size, alpha, epsilon):
     self.action_size = action_size
     self.tree = SumTree(buffer_size)
     self.batch_size = batch_size
     self.experience = namedtuple(
         "Experience",
         field_names=["state", "action", "reward", "next_state", "done"])
     self.alpha = alpha
     self.epsilon = epsilon
Exemplo n.º 8
0
 def __init__(self, capacity):
     """
     Instantiate a priority based memory with capable of holding
     capacity experiences. Memories are sampled with frequency
     based on their priority.
     """
     # Circular buffer array based tree with priorities as node values.
     self.tree = SumTree(capacity)
     self.e = 0.01 # Small constant to ensure all priorities > 0
     self.a = 0.6  # Constant to control the weight of error on priority
Exemplo n.º 9
0
 def __init__(self, e, a, beta, beta_increment_per_sampling, capacity,
              max_priority):
     self.capacity = capacity
     self.e = e
     self.a = a
     self.beta = beta
     self.beta_increment_per_sampling = beta_increment_per_sampling
     self.capacity = capacity
     self.max_priority = max_priority
     self.sum_tree = SumTree(self.capacity)
Exemplo n.º 10
0
 def __init__(self):
     self.limit = MEMORY_CAPACITY
     self.err_tree = SumTree(MEMORY_CAPACITY)
     self.action_shape = (0, MEMORY_ACTION_CNT)
     self.reward_shape = (0, MEMORY_REWARD_CNT)
     self.terminal_shape = self.action_shape
     self.observation_shape = (0, MEMORY_CRITIC_FEATURE_NUM)
     self.store_times = 0
     self.Transition = namedtuple(
         'Transition',
         ('state', 'action', 'reward', 'next_state', 'terminal'))
Exemplo n.º 11
0
    def __init__(self, alpha, beta, beta_end, epsilon, num_steps, replay_size):

        self.alpha = alpha
        self.beta_start = beta
        self.beta_end = beta_end
        self.beta = beta
        self.epsilon = epsilon
        self.num_steps = num_steps

        self.memory = SumTree(replay_size)
        self.replay_size = replay_size
Exemplo n.º 12
0
 def __init__(self,
              tree_memory_length,
              error_multiplier=0.01,
              alpha=0.6,
              beta=0.4,
              beta_increment_per_sample=0.001):
     self.tree = SumTree(tree_memory_length)
     self.tree_memory_length = tree_memory_length
     self.error_multiplier = error_multiplier
     self.per_alpha = alpha
     self.per_beta_init = beta
     self.beta_increment_per_sample = beta_increment_per_sample
Exemplo n.º 13
0
 def __init__(self,
              capacity,
              alpha=0.6,
              beta=0.4,
              beta_anneal_step=0.001,
              epsilon=0.00000001):
     self.tree = SumTree(capacity)
     self.capacity = capacity
     self.a = alpha
     self.beta = beta
     self.beta_increment_per_sampling = beta_anneal_step
     self.e = epsilon
Exemplo n.º 14
0
 def __init__(self, host_name, db_name, collection_name):
     self.host_name = host_name
     self.db_name = db_name
     self.collection_name = collection_name
     self.client = MongoClient(host_name, 27017)
     self.db = self.client[db_name]
     self.replay_memory_collection = self.db[collection_name]
     self.sum_tree = SumTree(self.capacity)
     memory_priorities = self.replay_memory_collection.find({},
                                                            {"priority": 1})
     for memory_priority in memory_priorities:
         self.sum_tree.add(memory_priority["priority"],
                           {"_id": memory_priority["_id"]})
Exemplo n.º 15
0
    def test_len(self):
        instance = SumTree(4)

        instance.add(p=1, data=1)
        self.assertEqual(len(instance), 1)

        instance.add(p=2, data=2)
        self.assertEqual(len(instance), 2)

        instance.add(p=3, data=3)
        instance.add(p=4, data=4)
        instance.add(p=5, data=5)

        self.assertEqual(len(instance), 4)
Exemplo n.º 16
0
    def __init__(self, action_size, buffer_size, batch_size, seed):
        """Initialize a ReplayBuffer object.

        Params
        ======
            action_size (int): dimension of each action
            buffer_size (int): maximum size of buffer
            batch_size (int): size of each training batch
            seed (int): random seed
        """
        self.action_size = action_size
        self.memory = SumTree(buffer_size)
        self.batch_size = batch_size
        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
        self.seed = random.seed(seed)
Exemplo n.º 17
0
    def __init__(self, params):

        buffer_size = params['buffer_size']
        batch_size = params['batch_size']
        mode = params['mode']

        self.__buffer_size = buffer_size
        self.__batch_size = batch_size
        self.__mode = mode

        self.__experience = namedtuple(
            "Experience",
            field_names=["state", "action", "reward", "next_state", "done"])
        self.__memory = SumTree(buffer_size)
        self.__memory_buffer = []
Exemplo n.º 18
0
    def __init__(self, device, memory_size, update_every=4, seed=0):
        """  Initializes the data structure

        :param device:  (torch.device) Object representing the device where to allocate tensors
        :param memory_size: (int) Maximum capacity of memory buffer
        :param update_every: (int) Number of steps between update operations
        :param seed:  (int) Seed used for PRNG
        """
        self.device = device
        self.probability_weights = SumTree(capacity=memory_size, seed=seed)
        self.elements = deque(maxlen=memory_size)
        self.update_every = update_every

        self.step = 0
        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
Exemplo n.º 19
0
 def __init__(self,
              capacity,
              state_size=37,
              epsilon=0.001,
              alpha=0.4,
              beta=0.3,
              beta_increment_per_sampling=0.001,
              abs_err_upper=1):
     self.tree = SumTree(capacity)
     self.epsilon = epsilon  # Avoid 0 priority and hence a do not give a chance for the priority to be selected stochastically
     self.alpha = alpha  # Vary priority vs randomness. alpha = 0 pure uniform randomnes. Alpha = 1, pure priority
     self.beta = beta  # importance-weight-sampling, from small to big to give more importance to corrections done towards the end of the training
     self.beta_increment_per_sampling = 0.001
     self.abs_err_upper = 1  # clipped abs error
     self.state_size = state_size
Exemplo n.º 20
0
class PrioritizedReplayMemory:

    def __init__(self, capacity, alpha=0.6, eps=1e-2):
        self.tree = SumTree(capacity)
        self.alpha = alpha # alpha determines how much prioritization is used
        self.eps = eps # epsilon smooths priority, priority = (TD_error + eps) ** alpha

    def _get_priority(self, td_error):
        return (td_error + self.eps) ** self.alpha

    def current_length(self):
        return self.tree.current_length()

    def total_sum(self):
        return self.tree.total_sum()

    def push(self, event, td_error):
        priority = self._get_priority(td_error)
        self.tree.insert(event, priority)

    def sample(self, batch_sz):
        batch = []
        indices = []
        priorities = []
        segment = self.tree.total_sum() / batch_sz

        for i in range(batch_sz):
            l = segment * i
            r = segment * (i + 1)

            s = random.uniform(l, r)
            (idx, priority, data) = self.tree.get(s)
            batch.append(data)
            indices.append(idx)
            priorities.append(priority)

        samples = map(np.array, zip(*batch))

        return samples, indices, priorities

    def update(self, idx, td_error):
        if isinstance(idx, list):
            for i in range(len(idx)):
                priority = self._get_priority(td_error[i])
                self.tree.update(idx[i], priority)
        else:
            priority = self._get_priority(td_error)
            self.tree.update(idx, priority)
Exemplo n.º 21
0
class MemoryDB:  # stored as ( s, a, r, s_ ) in SumTree
    def __init__(self, e, a, beta, beta_increment_per_sampling, capacity,
                 max_priority):
        self.capacity = capacity
        self.e = e
        self.a = a
        self.beta = beta
        self.beta_increment_per_sampling = beta_increment_per_sampling
        self.capacity = capacity
        self.max_priority = max_priority
        self.sum_tree = SumTree(self.capacity)

    def _get_priority(self, error):
        return min((self.max_priority, (error + self.e)**self.a))

    def add(self, experience, error=None):
        p = self._get_priority(error) if error != None else self.max_priority
        self.sum_tree.add(p, experience)

    def add_batch(self, experiences):
        for experience in experiences:
            self.add(experience, self.max_priority)

    def update(self, index, error, experience):
        p = self._get_priority(error)
        self.sum_tree.update(index, p)

    def update_batch(self, indexes, errors, experiences):
        for index, error, experience in zip(indexes, errors, experiences):
            self.update(index, error, experience)

    def get_experiences_size(self):
        return self.sum_tree.getCount()

    def sample(self, n):

        batch = []
        idxs = []
        segment = self.sum_tree.total() / n
        priorities = []

        self.beta = np.min([1., self.beta + self.beta_increment_per_sampling])

        for i in range(n):
            a = segment * i
            b = segment * (i + 1)
            s = random.uniform(a, b)
            (idx, p, data) = self.sum_tree.get(s)
            priorities.append(p)
            batch.append(data)
            idxs.append(idx)

        sampling_probabilities = priorities / self.sum_tree.total()
        is_weight = np.power(self.sum_tree.n_entries * sampling_probabilities,
                             -self.beta)
        is_weight /= is_weight.max()
        return batch, idxs, is_weight
Exemplo n.º 22
0
class Memory(object):
    def __init__(self, batch_size, max_size, beta):
        self.batch_size = batch_size  # mini batch大小
        self.max_size = 2**math.floor(math.log2(max_size)) # 保证 sum tree 为完全二叉树
        self.beta = beta

        self._sum_tree = SumTree(max_size)

    def store_transition(self, s, a, r, s_, done):
        self._sum_tree.add((s, a, r, s_, done))

    def get_mini_batches(self):
        n_sample = self.batch_size if self._sum_tree.size >= self.batch_size else self._sum_tree.size
        total = self._sum_tree.get_total()

        step = total // n_sample
        points_transitions_probs = []
        for i in range(n_sample):
            v = np.random.uniform(i * step, (i + 1) * step - 1)
            t = self._sum_tree.sample(v)
            points_transitions_probs.append(t)

        points, transitions, probs = zip(*points_transitions_probs)

        # 计算重要性比率
        max_impmortance_ratio = (n_sample * self._sum_tree.get_min())**-self.beta
        importance_ratio = [(n_sample * probs[i])**-self.beta / max_impmortance_ratio
                            for i in range(len(probs))]

        return points, tuple(np.array(e) for e in zip(*transitions)), importance_ratio

    def update(self, points, td_error):
        for i in range(len(points)):
            self._sum_tree.update(points[i], td_error[i])
Exemplo n.º 23
0
class PrioritisedMemory(object):
    def __init__(self, alpha, beta, beta_end, epsilon, num_steps, replay_size):

        self.alpha = alpha
        self.beta_start = beta
        self.beta_end = beta_end
        self.beta = beta
        self.epsilon = epsilon
        self.num_steps = num_steps

        self.memory = SumTree(replay_size)
        self.replay_size = replay_size

    def proprotional_priority(self, td_error):

        return (np.abs(td_error) + self.epsilon)**self.alpha

    def add_memory(self, td_error, data):

        priority = self.proprotional_priority(td_error)

        self.memory.add_memory(data, priority)

        self.beta = np.min([
            1.0, self.beta + (self.beta_end - self.beta_start) / self.num_steps
        ])

    def update_priority(self, index, td_error):

        new_priority = self.proprotional_priority(td_error)
        self.memory.update_priority(index, new_priority)

    def minibatch_sample(self, minibatch_size):

        samples = []
        priorities = []
        priority_indexes = []

        interval = self.memory.priority_total() / minibatch_size

        for i in range(minibatch_size):

            sample = np.random.uniform(i * interval, (i + 1) * interval)

            priority_index, priority, data = self.memory.get(sample)

            samples.append(data)

            priorities.append(priority)

            priority_indexes.append(priority_index)

        sampling_probabilities = priorities / self.memory.priority_total()
        importance_weights = np.power(
            self.memory.replay_size * sampling_probabilities, -self.beta)
        importance_weights /= np.max(is_weight)

        return priority_indexes, samples, importance_weights
Exemplo n.º 24
0
class Memory:  # stored as ( s, a, r, s_ ) in SumTree
    def __init__(self,
                 capacity,
                 alpha=0.6,
                 beta=0.4,
                 beta_anneal_step=0.001,
                 epsilon=0.00000001):
        tree_capacity = 1
        while tree_capacity < size:
            tree_capacity *= 2
        self.tree = SumTree(capacity)
        self.capacity = capacity
        self.a = alpha
        self.beta = beta
        self.beta_increment_per_sampling = beta_anneal_step
        self.e = epsilon

    def _get_priority(self, error):
        # Direct proportional prioritization
        return (np.abs(error) + self.e)**self.a

    def add(self, error, sample):
        p = self._get_priority(error)
        self.tree.add(p, sample)

    def sample(self, n):
        batch = []
        idxs = []
        segment = self.tree.total() / n
        priorities = []

        for i in range(n):
            a = segment * i
            b = segment * (i + 1)
            data = 0

            while data == 0:
                s = random.uniform(a, b)
                (idx, p, data) = self.tree.get(s)

            priorities.append(p)
            batch.append(data)
            idxs.append(idx)

        sampling_probabilities = priorities / self.tree.total()
        is_weight = np.power(self.tree.n_entries * sampling_probabilities,
                             -self.beta)
        is_weight /= is_weight.max()

        return batch, idxs, is_weight

    def step(self):
        self.beta = np.min(
            [1. - self.e, self.beta + self.beta_increment_per_sampling])

    def update(self, idx, error):
        p = self._get_priority(error)
        self.tree.update(idx, p)
Exemplo n.º 25
0
 def __init__(self,
              observation_len: int,
              action_len: int,
              reward_len: int,
              capacity: int,
              alpha: int = 0.6):
     super(PriorityBuffer, self).__init__(observation_len, action_len,
                                          reward_len, capacity)
     self.sum_tree = SumTree(capacity)
     self.max_priority = alpha
     self.min_priority = alpha
     self.alpha = alpha
Exemplo n.º 26
0
 def load(self, lst_serializable):
     """
     Load pickable representation of Replay Buffer. Inverse function of serializable
     """
     super().load(lst_serializable[0])
     self.max_priority = lst_serializable[1][0]
     self.min_priority = lst_serializable[1][1]
     self.alpha = lst_serializable[1][2]
     capacity = lst_serializable[1][3]
     tree_index = range(capacity)
     self.sum_tree = SumTree(capacity)
     self.sum_tree.update_values(tree_index, lst_serializable[1][4])
Exemplo n.º 27
0
class Memory:
    e = 0.01
    a = 0.6

    def __init__(self, capacity):
        self.tree = SumTree(capacity)
        self.capacity = capacity

    def _getPriority(self, error):
        return (error + self.e)**self.a

    def add(self, error, sample):
        p = self._getPriority(error)
        self.tree.add(p, sample)

    def sample(self, n):
        batch = []
        segment = self.tree.total() / n

        for i in range(n):
            a = segment * i
            b = segment * (i + 1)

            s = random.uniform(a, b)
            (idx, p, data) = self.tree.get(s)
            batch.append((idx, data))
        return batch

    def update(self, idx, error):
        p = self._getPriority(error)
        self.tree.update(idx, p)
Exemplo n.º 28
0
class PrioritizedReplayBuffer(ReplayBuffer):
    def __init__(self, buffer_size, alpha):
        self.capacity = buffer_size
        self.tree = SumTree(buffer_size)
        self.alpha = alpha
        self.max_priority = 1
        #self.beta_initial = ??
        #self.beta_steps = ??

    def add(self, experience):
        self.tree.add(self.max_priority, experience)

    def update(self, index, experience, td_error):
        priority = (abs(td_error) + 0.0001)**self.alpha
        self.tree.update(index, priority)
        if self.max_priority < priority:
            self.max_priority = priority

    def sample(self, batch_size):
        indexes = []
        batchs = []
        total = self.tree.total()
        section = total / batch_size
        for i in range(batch_size):
            r = section * i + np.random.random() * section
            (idx, priority, experience) = self.tree.get(r)
            indexes.append(idx)  # 後のpriority更新に使う
            batchs.append(experience)
        return (indexes, batchs)
Exemplo n.º 29
0
    def test_add(self):
        instance = SumTree(4)

        instance.add(p=1, data=1)
        np.testing.assert_array_equal([1, 1, 0, 1, 0, 0, 0], instance.tree)

        instance.add(p=2, data=2)
        np.testing.assert_array_equal([3, 3, 0, 1, 2, 0, 0], instance.tree)
Exemplo n.º 30
0
    def __init__(
        self,
        buffer_size,
        batch_size,
        seed,
        beta_start=0.4,
        delta_beta=1e-5,
        alpha=0.6,
        eps=1e-8,
    ):
        """Initialize PER.

        Args:
            buffer_size (int): Size of replay buffer. The actual size will be the
                first power of 2 greater than buffer_size.
            batch_size (int): Size of batches to draw.
            seed (float): Seed.
            beta_start (float): Initial value for beta (importance sampling exponent)
            delta_beta (float): Beta increment at each time step.
            alpha (float): Priority exponent.
            eps (float): Small positive number to avoid unsampling 0 prioritized examples.
        """
        # Depth of sum tree
        depth = int(math.log2(buffer_size)) + 1
        super(PrioritizeReplayBuffer, self).__init__(2**depth, batch_size,
                                                     seed)

        # Initialize sum tree to keep track of the sum of priorities
        self.priorities = SumTree(depth)

        # Current max priority
        self.max_p = 1.0

        # PER Parameters
        self.alpha = alpha
        self.eps = eps
        self.beta = beta_start
        self.delta_beta = delta_beta
Exemplo n.º 31
0
class PERMemory:
    EPSILON = 0.0001
    ALPHA = 0.5
    BETA = 0.4
    size = 0

    def __init__(self, config, capacity):
        self.config = config
        self.capacity = capacity
        self.tree = SumTree(capacity)

    def _getPriority(self, td_error):
        return (td_error + self.EPSILON) ** self.ALPHA

    def push(self, transition):
        self.size += 1

        priority = self.tree.max()
        if priority <= 0:
            priority = 1

        self.tree.add(priority, transition)

    def sample(self, size, episode):
        list = []
        indexes = []
        weights = np.empty(size, dtype='float32')
        total = self.tree.total()
        beta = self.BETA + (1 - self.BETA) * episode / self.config.num_episodes
        beta = min(1.0, beta)

        for i, rand in enumerate(np.random.uniform(0, total, size)):
            (idx, priority, data) = self.tree.get(rand)
            list.append(data)
            indexes.append(idx)
            weights[i] = (self.capacity * priority / total) ** (-beta)

        return (indexes, list, weights / weights.max())

    def update(self, idx, td_error):
        priority = self._getPriority(td_error)
        self.tree.update(idx, priority)

    def __len__(self):
        return self.size
Exemplo n.º 32
0
 def __init__(self, config, capacity):
     self.config = config
     self.capacity = capacity
     self.tree = SumTree(capacity)