Пример #1
0
class WeightBasedExpReplay(object):
    def __init__(self, maxSize, alpha=0.6, epsilon=0.000001):
        self.maxSize = maxSize
        self.buffer = Buffer(self.maxSize)
        self.sumTree = SumTree(self.maxSize)
        self.weights = {}
        self.alpha = 0.6
        self.curSize = 0
        self.epsilon = epsilon
        self.heap = Heap()

    def addExperience(self, experience):
        weight = self.heap.getMaxPriority()
        index = self.buffer.getPointer()
        self.buffer.insert(experience)
        prevWeight = 0
        if index in self.weights:
            prevWeight = self.weights[index]
        diffWeight = weight - prevWeight
        self.weights[index] = weight
        self.sumTree.insert(diffWeight, index)
        self.heap.add(index, weight)
        self.curSize = min(self.curSize + 1, self.maxSize)

    def modifyExperience(self, weight, index):
        weight = weight + self.epsilon
        weight = weight**self.alpha
        prevWeight = 0
        if index in self.weights:
            prevWeight = self.weights[index]
        diffWeight = weight - prevWeight
        self.weights[index] = weight
        self.sumTree.insert(diffWeight, index)
        self.heap.add(index, weight)

    def sample(self, samplesAmount):
        startPoints = np.linspace(0, self.sumTree.getAllSum(),
                                  samplesAmount + 1).tolist()
        expList = []
        weightList = []
        indexList = []
        for a in range(len(startPoints) - 1):
            start = startPoints[a]
            end = startPoints[a + 1]
            sampledNum = np.random.uniform(start, end)
            retrIndex = self.sumTree.search(sampledNum)
            expList.append(self.buffer.getItem(retrIndex))
            weightList.append(self.weights[retrIndex] /
                              self.sumTree.getAllSum())
            indexList.append(retrIndex)

        return np.asarray(expList), np.asarray(weightList), np.asarray(
            indexList)

    def getMaxPriority(self):
        if self.heap.size == 0:
            return sys.float_info.max
        return self.heap.p2w[1]
Пример #2
0
class ExperienceReplay(object):
    def __init__(self, maxCapacity):
        self.maxCapacity = maxCapacity
        self.buffer = Buffer(self.maxCapacity)

    def pushItem(self, item):
        self.buffer.pushItem(item)

    def sample(self, batchSize):
        availableIndexes = self.getLength()
        sampledIndexes = sample(range(0, availableIndexes), batchSize)
        return [self.buffer.getItem(index) for index in sampledIndexes]

    def getLength(self):
        return self.buffer.getLength()
class ExperienceReplay(object): 
    def __init__(self,maxSize):
        self.maxSize = maxSize
        self.buffer = Buffer(self.maxSize)
        self.curSize = 0

    def addExperience(self, *experience):
        self.buffer.insert(Transition(*experience))
        self.curSize = min(self.curSize+1,self.maxSize)

    def sample(self, samplesAmount):
        sampledPoints = np.random.choice(self.curSize, samplesAmount, replace=False).tolist()
        expList = []
        for a in sampledPoints :
                expList.append(self.buffer.getItem(a))

        return expList
class ExperienceReplay(object): 
    def __init__(self,maxSize, alpha=0.6):
        self.maxSize = maxSize
        self.buffer = Buffer(self.maxSize)
        self.curSize = 0

    def addExperience(self, experience):
        self.buffer.insert(experience)
        self.curSize = min(self.curSize+1,self.maxSize)

    def sample(self, samplesAmount):
        sampledPoints = np.random.choice(self.curSize, samplesAmount, replace=False).tolist()
        expList = []
        weightList = []
        for a in sampledPoints :
                expList.append(self.buffer.getItem(a))
                weightList.append(1.0/samplesAmount)
        return np.asarray(expList), weightList, None
Пример #5
0
class RankBasedExpReplay(object): 
    def __init__(self,maxSize, alpha=0.6):
        self.maxSize = maxSize
        self.buffer = Buffer(self.maxSize)
        self.heap = Heap()
        self.weights = None

        #Add two flags to indicate whether alpha or queue size has changed
        self.prevAlpha = alpha
        self.prevSize =0

        # Variables to store current alpha and exp replay size
        self.alpha = alpha
        self.curSize = 0

        #Weightings to each experience
        self.endPoints = []

    def addExperience(self, experience):
        index = self.buffer.getPointer()
        self.buffer.insert(experience)
        weight = self.heap.getMaxPriority()
        self.heap.add(index, weight)
        self.curSize = self.heap.size
        
    def modifyExperience(self, weight, index):
        self.heap.add(index, weight)
        self.curSize = self.heap.size
        
    def sample(self, samplesAmount):

        if (self.prevAlpha != self.alpha) or (self.prevSize != self.curSize) :
                self.endPoints, self.weights = self.computeBoundaries(self.alpha, self.curSize, samplesAmount)
                self.prevAlpha = self.alpha
                self.prevSize = self.curSize
        totalWeights = sum(self.weights)
        startPoint = 0
        expList = []
        weightList = []
        indexList = []
        for a in self.endPoints :
                end = a + 1
                diff = end - startPoint 
                sampledNum = np.random.randint(diff, size=1)[0]
                retrIndex = startPoint + sampledNum
                startPoint = end
                expList.append(self.buffer.getItem(self.heap.getIndex(retrIndex)))
                weightList.append(self.weights[retrIndex]/totalWeights)
                indexList.append(retrIndex)
        return np.asarray(expList),np.asarray(weightList),np.asarray(indexList)

    def computeBoundaries(self, alpha, curSize, samplesAmount):
        ranks = list(range(curSize))
        weights = [(1.0/(rank+1))**alpha for rank in ranks]
        sumAllWeights = sum(weights)
        stops = np.linspace(0,sumAllWeights,samplesAmount+1).tolist()
        del stops[0]
        curSum = 0
        curFounded = 0
        curStop = -1
        results = []
        for a in weights:
                curSum += a
                curStop += 1
                if curSum >= stops[curFounded]:
                        results.append(curStop)
                        curFounded += 1

        return results, weights
    
    def rebalance(self):
        indexList = []
        weightList = []
        while self.heap.size != 0:
            maxIndex = self.heap.p2i[1]
            maxWeight = self.heap.p2w[1]
            indexList.append(maxIndex)
            weightList.append(maxWeight)
            self.heap.delete(maxIndex)
        for a in range(len(indexList)):
            self.add(indexList[a],weightList[a])
            
    def getMaxPriority(self):
        if self.heap.size == 0:
            return sys.float_info.max
        return self.heap.p2w[1]