class WeightBasedExpReplay(object): def __init__(self, maxSize, alpha=0.6, epsilon=0.000001): self.maxSize = maxSize self.buffer = Buffer(self.maxSize) self.sumTree = SumTree(self.maxSize) self.weights = {} self.alpha = 0.6 self.curSize = 0 self.epsilon = epsilon self.heap = Heap() def addExperience(self, experience): weight = self.heap.getMaxPriority() index = self.buffer.getPointer() self.buffer.insert(experience) prevWeight = 0 if index in self.weights: prevWeight = self.weights[index] diffWeight = weight - prevWeight self.weights[index] = weight self.sumTree.insert(diffWeight, index) self.heap.add(index, weight) self.curSize = min(self.curSize + 1, self.maxSize) def modifyExperience(self, weight, index): weight = weight + self.epsilon weight = weight**self.alpha prevWeight = 0 if index in self.weights: prevWeight = self.weights[index] diffWeight = weight - prevWeight self.weights[index] = weight self.sumTree.insert(diffWeight, index) self.heap.add(index, weight) def sample(self, samplesAmount): startPoints = np.linspace(0, self.sumTree.getAllSum(), samplesAmount + 1).tolist() expList = [] weightList = [] indexList = [] for a in range(len(startPoints) - 1): start = startPoints[a] end = startPoints[a + 1] sampledNum = np.random.uniform(start, end) retrIndex = self.sumTree.search(sampledNum) expList.append(self.buffer.getItem(retrIndex)) weightList.append(self.weights[retrIndex] / self.sumTree.getAllSum()) indexList.append(retrIndex) return np.asarray(expList), np.asarray(weightList), np.asarray( indexList) def getMaxPriority(self): if self.heap.size == 0: return sys.float_info.max return self.heap.p2w[1]
class ExperienceReplay(object): def __init__(self, maxCapacity): self.maxCapacity = maxCapacity self.buffer = Buffer(self.maxCapacity) def pushItem(self, item): self.buffer.pushItem(item) def sample(self, batchSize): availableIndexes = self.getLength() sampledIndexes = sample(range(0, availableIndexes), batchSize) return [self.buffer.getItem(index) for index in sampledIndexes] def getLength(self): return self.buffer.getLength()
class ExperienceReplay(object): def __init__(self,maxSize): self.maxSize = maxSize self.buffer = Buffer(self.maxSize) self.curSize = 0 def addExperience(self, *experience): self.buffer.insert(Transition(*experience)) self.curSize = min(self.curSize+1,self.maxSize) def sample(self, samplesAmount): sampledPoints = np.random.choice(self.curSize, samplesAmount, replace=False).tolist() expList = [] for a in sampledPoints : expList.append(self.buffer.getItem(a)) return expList
class ExperienceReplay(object): def __init__(self,maxSize, alpha=0.6): self.maxSize = maxSize self.buffer = Buffer(self.maxSize) self.curSize = 0 def addExperience(self, experience): self.buffer.insert(experience) self.curSize = min(self.curSize+1,self.maxSize) def sample(self, samplesAmount): sampledPoints = np.random.choice(self.curSize, samplesAmount, replace=False).tolist() expList = [] weightList = [] for a in sampledPoints : expList.append(self.buffer.getItem(a)) weightList.append(1.0/samplesAmount) return np.asarray(expList), weightList, None
class RankBasedExpReplay(object): def __init__(self,maxSize, alpha=0.6): self.maxSize = maxSize self.buffer = Buffer(self.maxSize) self.heap = Heap() self.weights = None #Add two flags to indicate whether alpha or queue size has changed self.prevAlpha = alpha self.prevSize =0 # Variables to store current alpha and exp replay size self.alpha = alpha self.curSize = 0 #Weightings to each experience self.endPoints = [] def addExperience(self, experience): index = self.buffer.getPointer() self.buffer.insert(experience) weight = self.heap.getMaxPriority() self.heap.add(index, weight) self.curSize = self.heap.size def modifyExperience(self, weight, index): self.heap.add(index, weight) self.curSize = self.heap.size def sample(self, samplesAmount): if (self.prevAlpha != self.alpha) or (self.prevSize != self.curSize) : self.endPoints, self.weights = self.computeBoundaries(self.alpha, self.curSize, samplesAmount) self.prevAlpha = self.alpha self.prevSize = self.curSize totalWeights = sum(self.weights) startPoint = 0 expList = [] weightList = [] indexList = [] for a in self.endPoints : end = a + 1 diff = end - startPoint sampledNum = np.random.randint(diff, size=1)[0] retrIndex = startPoint + sampledNum startPoint = end expList.append(self.buffer.getItem(self.heap.getIndex(retrIndex))) weightList.append(self.weights[retrIndex]/totalWeights) indexList.append(retrIndex) return np.asarray(expList),np.asarray(weightList),np.asarray(indexList) def computeBoundaries(self, alpha, curSize, samplesAmount): ranks = list(range(curSize)) weights = [(1.0/(rank+1))**alpha for rank in ranks] sumAllWeights = sum(weights) stops = np.linspace(0,sumAllWeights,samplesAmount+1).tolist() del stops[0] curSum = 0 curFounded = 0 curStop = -1 results = [] for a in weights: curSum += a curStop += 1 if curSum >= stops[curFounded]: results.append(curStop) curFounded += 1 return results, weights def rebalance(self): indexList = [] weightList = [] while self.heap.size != 0: maxIndex = self.heap.p2i[1] maxWeight = self.heap.p2w[1] indexList.append(maxIndex) weightList.append(maxWeight) self.heap.delete(maxIndex) for a in range(len(indexList)): self.add(indexList[a],weightList[a]) def getMaxPriority(self): if self.heap.size == 0: return sys.float_info.max return self.heap.p2w[1]