def __init__(self, batch_size, max_size, beta): self.batch_size = batch_size # mini batch大小 self.max_size = 2**math.floor( math.log2(max_size)) # 保证 sum tree 为完全二叉树 self.beta = beta self._sum_tree = SumTree(max_size)
def __init__(self, buffer_size, seed): """Initialize a ReplayBuffer object. Params ====== seed (int): random seed """ self.memory = SumTree(buffer_size) self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"]) self.seed = random.seed(seed) # epsilon: small amount to avoid zero priority # alpha: [0~1] determines how much prioritization is used. with 0, we would get the uniform case # beta: Controls importance-sampling compensation. fully compensates for the non-uniform probabilities # when beta=1. The unbiased nature of the updates is most important near convergence at the end of # training, so we define a schedule on the exponent beta that starts from initial value and reaches 1 # only at the end of learning. self.epsilon = 0.01 self.alpha = 0.6 beta_start = 0.4 self.beta_end = 1.0 self.beta = beta_start beta_increments = 200 self.beta_increment = (self.beta_end - beta_start)/beta_increments
def __init__(self, buffer_size, alpha, beta_zero, beta_increment_size=0.001, epsilon=0.1, max_priority=1., seed=None): """Priority replay buffer initialiser. Args: buffer_size (int): capacity of the replay buffer. alpha (float): priority scaling hyperparameter. beta_zero (float): importance sampling scaling hyperparameter. beta_increment_size (float): beta annealing rate. epsilon (float): base priority to ensure non-zero sampling probability. max_priority (float): initial maximum priority. seed (int): seed for random number generator """ random.seed(seed) self.sum_tree = SumTree(buffer_size) self.memory = {} self.experience = namedtuple( "experience", ["state", "action", "reward", "next_state", "done"]) self.buffer_size = buffer_size self.beta_increment_size = beta_increment_size self.max_priority = max_priority**alpha self.min_priority = max_priority**alpha self.last_min_update = 0 self.alpha = alpha self.beta = beta_zero self.epsilon = epsilon
def test_add(self): instance = SumTree(4) instance.add(p=1, data=1) np.testing.assert_array_equal([1, 1, 0, 1, 0, 0, 0], instance.tree) instance.add(p=2, data=2) np.testing.assert_array_equal([3, 3, 0, 1, 2, 0, 0], instance.tree)
def __init__(self, max_size, alpha, eps): self.max_size = max_size self.alpha = alpha self.eps = eps self.tree = SumTree(max_size) self.last_idxs = None self.size = 0
def __init__(self, capacity, batch_size): self.capacity = capacity self.batch_size = batch_size self.tree = SumTree(capacity=capacity) self.alpha = 0.6 self.beta = 0.4 self.p_epsilon = 1e-4 self.batch_size = 50
def __init__(self, memory_size): self.memory_size = memory_size self.memory = SumTree(memory_size) self.epsilon = 0.0001 # small amount to avoid zero priority self.alpha = 0.6 # adj_pri = pri^alpha self.beta = 0.4 # importance-sampling, from initial value increasing to 1 self.beta_max = 1 self.beta_increment_per_sampling = 0.001 self.abs_err_upper = 1. # clipped td error
def __init__(self, action_size, buffer_size, batch_size, alpha, epsilon): self.action_size = action_size self.tree = SumTree(buffer_size) self.batch_size = batch_size self.experience = namedtuple( "Experience", field_names=["state", "action", "reward", "next_state", "done"]) self.alpha = alpha self.epsilon = epsilon
def __init__(self, capacity): """ Instantiate a priority based memory with capable of holding capacity experiences. Memories are sampled with frequency based on their priority. """ # Circular buffer array based tree with priorities as node values. self.tree = SumTree(capacity) self.e = 0.01 # Small constant to ensure all priorities > 0 self.a = 0.6 # Constant to control the weight of error on priority
def __init__(self, e, a, beta, beta_increment_per_sampling, capacity, max_priority): self.capacity = capacity self.e = e self.a = a self.beta = beta self.beta_increment_per_sampling = beta_increment_per_sampling self.capacity = capacity self.max_priority = max_priority self.sum_tree = SumTree(self.capacity)
def __init__(self): self.limit = MEMORY_CAPACITY self.err_tree = SumTree(MEMORY_CAPACITY) self.action_shape = (0, MEMORY_ACTION_CNT) self.reward_shape = (0, MEMORY_REWARD_CNT) self.terminal_shape = self.action_shape self.observation_shape = (0, MEMORY_CRITIC_FEATURE_NUM) self.store_times = 0 self.Transition = namedtuple( 'Transition', ('state', 'action', 'reward', 'next_state', 'terminal'))
def __init__(self, alpha, beta, beta_end, epsilon, num_steps, replay_size): self.alpha = alpha self.beta_start = beta self.beta_end = beta_end self.beta = beta self.epsilon = epsilon self.num_steps = num_steps self.memory = SumTree(replay_size) self.replay_size = replay_size
def __init__(self, observation_len: int, action_len: int, reward_len: int, capacity: int, alpha: int = 0.6): super(PriorityBuffer, self).__init__(observation_len, action_len, reward_len, capacity) self.sum_tree = SumTree(capacity) self.max_priority = alpha self.min_priority = alpha self.alpha = alpha
def __init__(self, tree_memory_length, error_multiplier=0.01, alpha=0.6, beta=0.4, beta_increment_per_sample=0.001): self.tree = SumTree(tree_memory_length) self.tree_memory_length = tree_memory_length self.error_multiplier = error_multiplier self.per_alpha = alpha self.per_beta_init = beta self.beta_increment_per_sample = beta_increment_per_sample
def __init__(self, capacity, alpha=0.6, beta=0.4, beta_anneal_step=0.001, epsilon=0.00000001): self.tree = SumTree(capacity) self.capacity = capacity self.a = alpha self.beta = beta self.beta_increment_per_sampling = beta_anneal_step self.e = epsilon
def load(self, lst_serializable): """ Load pickable representation of Replay Buffer. Inverse function of serializable """ super().load(lst_serializable[0]) self.max_priority = lst_serializable[1][0] self.min_priority = lst_serializable[1][1] self.alpha = lst_serializable[1][2] capacity = lst_serializable[1][3] tree_index = range(capacity) self.sum_tree = SumTree(capacity) self.sum_tree.update_values(tree_index, lst_serializable[1][4])
def __init__(self, host_name, db_name, collection_name): self.host_name = host_name self.db_name = db_name self.collection_name = collection_name self.client = MongoClient(host_name, 27017) self.db = self.client[db_name] self.replay_memory_collection = self.db[collection_name] self.sum_tree = SumTree(self.capacity) memory_priorities = self.replay_memory_collection.find({}, {"priority": 1}) for memory_priority in memory_priorities: self.sum_tree.add(memory_priority["priority"], {"_id": memory_priority["_id"]})
def test_len(self): instance = SumTree(4) instance.add(p=1, data=1) self.assertEqual(len(instance), 1) instance.add(p=2, data=2) self.assertEqual(len(instance), 2) instance.add(p=3, data=3) instance.add(p=4, data=4) instance.add(p=5, data=5) self.assertEqual(len(instance), 4)
def __init__(self, params): buffer_size = params['buffer_size'] batch_size = params['batch_size'] mode = params['mode'] self.__buffer_size = buffer_size self.__batch_size = batch_size self.__mode = mode self.__experience = namedtuple( "Experience", field_names=["state", "action", "reward", "next_state", "done"]) self.__memory = SumTree(buffer_size) self.__memory_buffer = []
def __init__(self, action_size, buffer_size, batch_size, seed): """Initialize a ReplayBuffer object. Params ====== action_size (int): dimension of each action buffer_size (int): maximum size of buffer batch_size (int): size of each training batch seed (int): random seed """ self.action_size = action_size self.memory = SumTree(buffer_size) self.batch_size = batch_size self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"]) self.seed = random.seed(seed)
def __init__(self, device, memory_size, update_every=4, seed=0): """ Initializes the data structure :param device: (torch.device) Object representing the device where to allocate tensors :param memory_size: (int) Maximum capacity of memory buffer :param update_every: (int) Number of steps between update operations :param seed: (int) Seed used for PRNG """ self.device = device self.probability_weights = SumTree(capacity=memory_size, seed=seed) self.elements = deque(maxlen=memory_size) self.update_every = update_every self.step = 0 self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
def __init__(self, capacity, state_size=37, epsilon=0.001, alpha=0.4, beta=0.3, beta_increment_per_sampling=0.001, abs_err_upper=1): self.tree = SumTree(capacity) self.epsilon = epsilon # Avoid 0 priority and hence a do not give a chance for the priority to be selected stochastically self.alpha = alpha # Vary priority vs randomness. alpha = 0 pure uniform randomnes. Alpha = 1, pure priority self.beta = beta # importance-weight-sampling, from small to big to give more importance to corrections done towards the end of the training self.beta_increment_per_sampling = 0.001 self.abs_err_upper = 1 # clipped abs error self.state_size = state_size
def __init__( self, buffer_size, batch_size, seed, beta_start=0.4, delta_beta=1e-5, alpha=0.6, eps=1e-8, ): """Initialize PER. Args: buffer_size (int): Size of replay buffer. The actual size will be the first power of 2 greater than buffer_size. batch_size (int): Size of batches to draw. seed (float): Seed. beta_start (float): Initial value for beta (importance sampling exponent) delta_beta (float): Beta increment at each time step. alpha (float): Priority exponent. eps (float): Small positive number to avoid unsampling 0 prioritized examples. """ # Depth of sum tree depth = int(math.log2(buffer_size)) + 1 super(PrioritizeReplayBuffer, self).__init__(2**depth, batch_size, seed) # Initialize sum tree to keep track of the sum of priorities self.priorities = SumTree(depth) # Current max priority self.max_p = 1.0 # PER Parameters self.alpha = alpha self.eps = eps self.beta = beta_start self.delta_beta = delta_beta
def __init__(self, action_size, agent_config): """Initialize a PriorityBuffer object. Params ====== action_size (int): dimension of each action buffer_size (int): maximum size of buffer batch_size (int): size of each training batch seed (int): random seed a (float): amount of uniformity in the sampling (0 == uniform, 1. == priority only) beta_start (float): start of beta value for prioritised buffer beta_max_steps (int): max number of steps to reach beta value of 1. """ self.action_size = action_size self.tree = SumTree(capacity=agent_config.buffer_size) self.batch_size = agent_config.batch_size # self.seed = random.seed(buffer_config.seed) self.epsilon = agent_config.buffer_epsilon # how much randomness we require a = 0 (pure random) a = 1 (only priority) self.alpha = agent_config.alpha self.beta = agent_config.beta_start self.beta_start = agent_config.beta_start self.beta_end = agent_config.beta_end self.beta_increment_per_sampling = (self.beta_end - self.beta_start) / agent_config.beta_max_steps
def __init__(self, capacity): self.tree = SumTree(capacity) self.capacity = capacity
def __init__(self): global MEMORY_LEN self.tree = SumTree(MEMORY_LEN)
def create_tree(sample): tree = SumTree(len(sample)) for e in sample: tree.add(p=e, data=e) return tree
def __init__(self, buffer_size, alpha): self.capacity = buffer_size self.tree = SumTree(buffer_size) self.alpha = alpha self.max_priority = 1
def __init__(self, config, capacity): self.config = config self.capacity = capacity self.tree = SumTree(capacity)
def __init__(self, obs_dim, act_dim, size, act_dtype): SimpleMemory.__init__(self, obs_dim, act_dim, size, act_dtype) self.tree = SumTree(size) self.tree_lock = Lock()