Exemplo n.º 1
0
    def __init__(self, batch_size, max_size, beta):
        self.batch_size = batch_size  # mini batch大小
        self.max_size = 2**math.floor(
            math.log2(max_size))  # 保证 sum tree 为完全二叉树
        self.beta = beta

        self._sum_tree = SumTree(max_size)
    def __init__(self, buffer_size, seed):
        """Initialize a ReplayBuffer object.

        Params
        ======
            seed (int): random seed
        """
        self.memory = SumTree(buffer_size)
        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
        self.seed = random.seed(seed)

        # epsilon: small amount to avoid zero priority
        # alpha: [0~1] determines how much prioritization is used. with 0, we would get the uniform case
        # beta: Controls importance-sampling compensation. fully compensates for the non-uniform probabilities
        #   when beta=1. The unbiased nature of the updates is most important near convergence at the end of
        #   training, so we define a schedule on the exponent beta that starts from initial value and reaches 1
        #   only at the end of learning.

        self.epsilon = 0.01
        self.alpha = 0.6
        
        beta_start = 0.4
        self.beta_end = 1.0
        self.beta = beta_start
        beta_increments = 200
        self.beta_increment = (self.beta_end - beta_start)/beta_increments
    def __init__(self,
                 buffer_size,
                 alpha,
                 beta_zero,
                 beta_increment_size=0.001,
                 epsilon=0.1,
                 max_priority=1.,
                 seed=None):
        """Priority replay buffer initialiser.

        Args:
            buffer_size (int): capacity of the replay buffer.
            alpha (float): priority scaling hyperparameter.
            beta_zero (float): importance sampling scaling hyperparameter.
            beta_increment_size (float): beta annealing rate.
            epsilon (float): base priority to ensure non-zero sampling probability.
            max_priority (float): initial maximum priority.
            seed (int): seed for random number generator
       """
        random.seed(seed)

        self.sum_tree = SumTree(buffer_size)
        self.memory = {}
        self.experience = namedtuple(
            "experience", ["state", "action", "reward", "next_state", "done"])
        self.buffer_size = buffer_size
        self.beta_increment_size = beta_increment_size
        self.max_priority = max_priority**alpha
        self.min_priority = max_priority**alpha
        self.last_min_update = 0

        self.alpha = alpha
        self.beta = beta_zero
        self.epsilon = epsilon
Exemplo n.º 4
0
    def test_add(self):
        instance = SumTree(4)

        instance.add(p=1, data=1)
        np.testing.assert_array_equal([1, 1, 0, 1, 0, 0, 0], instance.tree)

        instance.add(p=2, data=2)
        np.testing.assert_array_equal([3, 3, 0, 1, 2, 0, 0], instance.tree)
Exemplo n.º 5
0
    def __init__(self, max_size, alpha, eps):
        self.max_size = max_size
        self.alpha = alpha
        self.eps = eps

        self.tree = SumTree(max_size)
        self.last_idxs = None
        self.size = 0
Exemplo n.º 6
0
    def __init__(self, capacity, batch_size):
        self.capacity = capacity
        self.batch_size = batch_size
        self.tree = SumTree(capacity=capacity)

        self.alpha = 0.6
        self.beta = 0.4
        self.p_epsilon = 1e-4
        self.batch_size = 50
Exemplo n.º 7
0
 def __init__(self, memory_size):
     self.memory_size = memory_size
     self.memory = SumTree(memory_size)
     self.epsilon = 0.0001  # small amount to avoid zero priority
     self.alpha = 0.6  # adj_pri = pri^alpha
     self.beta = 0.4  # importance-sampling, from initial value increasing to 1
     self.beta_max = 1
     self.beta_increment_per_sampling = 0.001
     self.abs_err_upper = 1.  # clipped td error
Exemplo n.º 8
0
 def __init__(self, action_size, buffer_size, batch_size, alpha, epsilon):
     self.action_size = action_size
     self.tree = SumTree(buffer_size)
     self.batch_size = batch_size
     self.experience = namedtuple(
         "Experience",
         field_names=["state", "action", "reward", "next_state", "done"])
     self.alpha = alpha
     self.epsilon = epsilon
Exemplo n.º 9
0
 def __init__(self, capacity):
     """
     Instantiate a priority based memory with capable of holding
     capacity experiences. Memories are sampled with frequency
     based on their priority.
     """
     # Circular buffer array based tree with priorities as node values.
     self.tree = SumTree(capacity)
     self.e = 0.01 # Small constant to ensure all priorities > 0
     self.a = 0.6  # Constant to control the weight of error on priority
Exemplo n.º 10
0
 def __init__(self, e, a, beta, beta_increment_per_sampling, capacity,
              max_priority):
     self.capacity = capacity
     self.e = e
     self.a = a
     self.beta = beta
     self.beta_increment_per_sampling = beta_increment_per_sampling
     self.capacity = capacity
     self.max_priority = max_priority
     self.sum_tree = SumTree(self.capacity)
Exemplo n.º 11
0
 def __init__(self):
     self.limit = MEMORY_CAPACITY
     self.err_tree = SumTree(MEMORY_CAPACITY)
     self.action_shape = (0, MEMORY_ACTION_CNT)
     self.reward_shape = (0, MEMORY_REWARD_CNT)
     self.terminal_shape = self.action_shape
     self.observation_shape = (0, MEMORY_CRITIC_FEATURE_NUM)
     self.store_times = 0
     self.Transition = namedtuple(
         'Transition',
         ('state', 'action', 'reward', 'next_state', 'terminal'))
Exemplo n.º 12
0
    def __init__(self, alpha, beta, beta_end, epsilon, num_steps, replay_size):

        self.alpha = alpha
        self.beta_start = beta
        self.beta_end = beta_end
        self.beta = beta
        self.epsilon = epsilon
        self.num_steps = num_steps

        self.memory = SumTree(replay_size)
        self.replay_size = replay_size
Exemplo n.º 13
0
 def __init__(self,
              observation_len: int,
              action_len: int,
              reward_len: int,
              capacity: int,
              alpha: int = 0.6):
     super(PriorityBuffer, self).__init__(observation_len, action_len,
                                          reward_len, capacity)
     self.sum_tree = SumTree(capacity)
     self.max_priority = alpha
     self.min_priority = alpha
     self.alpha = alpha
Exemplo n.º 14
0
 def __init__(self,
              tree_memory_length,
              error_multiplier=0.01,
              alpha=0.6,
              beta=0.4,
              beta_increment_per_sample=0.001):
     self.tree = SumTree(tree_memory_length)
     self.tree_memory_length = tree_memory_length
     self.error_multiplier = error_multiplier
     self.per_alpha = alpha
     self.per_beta_init = beta
     self.beta_increment_per_sample = beta_increment_per_sample
Exemplo n.º 15
0
 def __init__(self,
              capacity,
              alpha=0.6,
              beta=0.4,
              beta_anneal_step=0.001,
              epsilon=0.00000001):
     self.tree = SumTree(capacity)
     self.capacity = capacity
     self.a = alpha
     self.beta = beta
     self.beta_increment_per_sampling = beta_anneal_step
     self.e = epsilon
Exemplo n.º 16
0
 def load(self, lst_serializable):
     """
     Load pickable representation of Replay Buffer. Inverse function of serializable
     """
     super().load(lst_serializable[0])
     self.max_priority = lst_serializable[1][0]
     self.min_priority = lst_serializable[1][1]
     self.alpha = lst_serializable[1][2]
     capacity = lst_serializable[1][3]
     tree_index = range(capacity)
     self.sum_tree = SumTree(capacity)
     self.sum_tree.update_values(tree_index, lst_serializable[1][4])
Exemplo n.º 17
0
 def __init__(self, host_name, db_name, collection_name):
     self.host_name = host_name
     self.db_name = db_name
     self.collection_name = collection_name
     self.client = MongoClient(host_name, 27017)
     self.db = self.client[db_name]
     self.replay_memory_collection = self.db[collection_name]
     self.sum_tree = SumTree(self.capacity)
     memory_priorities = self.replay_memory_collection.find({},
                                                            {"priority": 1})
     for memory_priority in memory_priorities:
         self.sum_tree.add(memory_priority["priority"],
                           {"_id": memory_priority["_id"]})
Exemplo n.º 18
0
    def test_len(self):
        instance = SumTree(4)

        instance.add(p=1, data=1)
        self.assertEqual(len(instance), 1)

        instance.add(p=2, data=2)
        self.assertEqual(len(instance), 2)

        instance.add(p=3, data=3)
        instance.add(p=4, data=4)
        instance.add(p=5, data=5)

        self.assertEqual(len(instance), 4)
Exemplo n.º 19
0
    def __init__(self, params):

        buffer_size = params['buffer_size']
        batch_size = params['batch_size']
        mode = params['mode']

        self.__buffer_size = buffer_size
        self.__batch_size = batch_size
        self.__mode = mode

        self.__experience = namedtuple(
            "Experience",
            field_names=["state", "action", "reward", "next_state", "done"])
        self.__memory = SumTree(buffer_size)
        self.__memory_buffer = []
Exemplo n.º 20
0
    def __init__(self, action_size, buffer_size, batch_size, seed):
        """Initialize a ReplayBuffer object.

        Params
        ======
            action_size (int): dimension of each action
            buffer_size (int): maximum size of buffer
            batch_size (int): size of each training batch
            seed (int): random seed
        """
        self.action_size = action_size
        self.memory = SumTree(buffer_size)
        self.batch_size = batch_size
        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
        self.seed = random.seed(seed)
Exemplo n.º 21
0
    def __init__(self, device, memory_size, update_every=4, seed=0):
        """  Initializes the data structure

        :param device:  (torch.device) Object representing the device where to allocate tensors
        :param memory_size: (int) Maximum capacity of memory buffer
        :param update_every: (int) Number of steps between update operations
        :param seed:  (int) Seed used for PRNG
        """
        self.device = device
        self.probability_weights = SumTree(capacity=memory_size, seed=seed)
        self.elements = deque(maxlen=memory_size)
        self.update_every = update_every

        self.step = 0
        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
Exemplo n.º 22
0
 def __init__(self,
              capacity,
              state_size=37,
              epsilon=0.001,
              alpha=0.4,
              beta=0.3,
              beta_increment_per_sampling=0.001,
              abs_err_upper=1):
     self.tree = SumTree(capacity)
     self.epsilon = epsilon  # Avoid 0 priority and hence a do not give a chance for the priority to be selected stochastically
     self.alpha = alpha  # Vary priority vs randomness. alpha = 0 pure uniform randomnes. Alpha = 1, pure priority
     self.beta = beta  # importance-weight-sampling, from small to big to give more importance to corrections done towards the end of the training
     self.beta_increment_per_sampling = 0.001
     self.abs_err_upper = 1  # clipped abs error
     self.state_size = state_size
Exemplo n.º 23
0
    def __init__(
        self,
        buffer_size,
        batch_size,
        seed,
        beta_start=0.4,
        delta_beta=1e-5,
        alpha=0.6,
        eps=1e-8,
    ):
        """Initialize PER.

        Args:
            buffer_size (int): Size of replay buffer. The actual size will be the
                first power of 2 greater than buffer_size.
            batch_size (int): Size of batches to draw.
            seed (float): Seed.
            beta_start (float): Initial value for beta (importance sampling exponent)
            delta_beta (float): Beta increment at each time step.
            alpha (float): Priority exponent.
            eps (float): Small positive number to avoid unsampling 0 prioritized examples.
        """
        # Depth of sum tree
        depth = int(math.log2(buffer_size)) + 1
        super(PrioritizeReplayBuffer, self).__init__(2**depth, batch_size,
                                                     seed)

        # Initialize sum tree to keep track of the sum of priorities
        self.priorities = SumTree(depth)

        # Current max priority
        self.max_p = 1.0

        # PER Parameters
        self.alpha = alpha
        self.eps = eps
        self.beta = beta_start
        self.delta_beta = delta_beta
Exemplo n.º 24
0
    def __init__(self, action_size, agent_config):
        """Initialize a PriorityBuffer object.

        Params
        ======
            action_size (int): dimension of each action
            buffer_size (int): maximum size of buffer
            batch_size (int): size of each training batch
            seed (int): random seed
            a (float): amount of uniformity in the sampling (0 == uniform, 1. == priority only)
            beta_start (float): start of beta value for prioritised buffer
            beta_max_steps (int): max number of steps to reach beta value of 1.
        """
        self.action_size = action_size
        self.tree = SumTree(capacity=agent_config.buffer_size)
        self.batch_size = agent_config.batch_size
        # self.seed = random.seed(buffer_config.seed)
        self.epsilon = agent_config.buffer_epsilon
        # how much randomness we require a = 0 (pure random) a = 1 (only priority)
        self.alpha = agent_config.alpha
        self.beta = agent_config.beta_start
        self.beta_start = agent_config.beta_start
        self.beta_end = agent_config.beta_end
        self.beta_increment_per_sampling = (self.beta_end - self.beta_start) / agent_config.beta_max_steps
Exemplo n.º 25
0
 def __init__(self, capacity):
     self.tree = SumTree(capacity)
     self.capacity = capacity
Exemplo n.º 26
0
 def __init__(self):
     global MEMORY_LEN
     self.tree = SumTree(MEMORY_LEN)
Exemplo n.º 27
0
    def create_tree(sample):
        tree = SumTree(len(sample))
        for e in sample:
            tree.add(p=e, data=e)

        return tree
Exemplo n.º 28
0
 def __init__(self, buffer_size, alpha):
     self.capacity = buffer_size
     self.tree = SumTree(buffer_size)
     self.alpha = alpha
     self.max_priority = 1
Exemplo n.º 29
0
 def __init__(self, config, capacity):
     self.config = config
     self.capacity = capacity
     self.tree = SumTree(capacity)
Exemplo n.º 30
0
 def __init__(self, obs_dim, act_dim, size, act_dtype):
     SimpleMemory.__init__(self, obs_dim, act_dim, size, act_dtype)
     self.tree = SumTree(size)
     self.tree_lock = Lock()