示例#1
0
class WeightBasedExpReplay(object):
    def __init__(self, maxSize, alpha=0.6, epsilon=0.000001):
        self.maxSize = maxSize
        self.buffer = Buffer(self.maxSize)
        self.sumTree = SumTree(self.maxSize)
        self.weights = {}
        self.alpha = 0.6
        self.curSize = 0
        self.epsilon = epsilon
        self.heap = Heap()

    def addExperience(self, experience):
        weight = self.heap.getMaxPriority()
        index = self.buffer.getPointer()
        self.buffer.insert(experience)
        prevWeight = 0
        if index in self.weights:
            prevWeight = self.weights[index]
        diffWeight = weight - prevWeight
        self.weights[index] = weight
        self.sumTree.insert(diffWeight, index)
        self.heap.add(index, weight)
        self.curSize = min(self.curSize + 1, self.maxSize)

    def modifyExperience(self, weight, index):
        weight = weight + self.epsilon
        weight = weight**self.alpha
        prevWeight = 0
        if index in self.weights:
            prevWeight = self.weights[index]
        diffWeight = weight - prevWeight
        self.weights[index] = weight
        self.sumTree.insert(diffWeight, index)
        self.heap.add(index, weight)

    def sample(self, samplesAmount):
        startPoints = np.linspace(0, self.sumTree.getAllSum(),
                                  samplesAmount + 1).tolist()
        expList = []
        weightList = []
        indexList = []
        for a in range(len(startPoints) - 1):
            start = startPoints[a]
            end = startPoints[a + 1]
            sampledNum = np.random.uniform(start, end)
            retrIndex = self.sumTree.search(sampledNum)
            expList.append(self.buffer.getItem(retrIndex))
            weightList.append(self.weights[retrIndex] /
                              self.sumTree.getAllSum())
            indexList.append(retrIndex)

        return np.asarray(expList), np.asarray(weightList), np.asarray(
            indexList)

    def getMaxPriority(self):
        if self.heap.size == 0:
            return sys.float_info.max
        return self.heap.p2w[1]
class ExperienceReplay(object): 
    def __init__(self,maxSize):
        self.maxSize = maxSize
        self.buffer = Buffer(self.maxSize)
        self.curSize = 0

    def addExperience(self, *experience):
        self.buffer.insert(Transition(*experience))
        self.curSize = min(self.curSize+1,self.maxSize)

    def sample(self, samplesAmount):
        sampledPoints = np.random.choice(self.curSize, samplesAmount, replace=False).tolist()
        expList = []
        for a in sampledPoints :
                expList.append(self.buffer.getItem(a))

        return expList
示例#3
0
class Agent():
    def __init__(self, Env_dim, Nb_action):
        self.memory = Buffer(Memory_size)
        self.eval_nn = Network(Env_dim, Nb_action)
        self.target_nn = Network(Env_dim, Nb_action)
        self.optimizer = torch.optim.Adam(self.eval_nn.parameters(),
                                          lr=Learning_rate)
        self.criterion = nn.MSELoss(reduction='sum')
        self.counter = 0
        self.target_nn.fc1 = self.eval_nn.fc1
        self.target_nn.fc2 = self.eval_nn.fc2
        self.target_nn.out = self.eval_nn.out

    def choose_action(self, s):
        s = torch.unsqueeze(torch.FloatTensor(s), 0)
        return self.eval_nn(s)[0].detach()  # ae(s)

    def getSample(self):
        return self.memory.sample(Batch_size)

    def optimize_model(self, file):
        if self.memory.get_nb_elements() >= Batch_size:
            batch = self.memory.sample(Batch_size)
            for s, a, s_, r, done in batch:
                qValues = (self.eval_nn(torch.tensor(s).float()))[a]
                qValues_ = self.target_nn(torch.tensor(s_).float())
                qValues_target = Gamma * torch.max(qValues_)
                JO = pow(qValues - (r + (qValues_target * (1 - done))), 2)
                loss = self.criterion(qValues, JO)
                self.optimizer.zero_grad()
                # if i != Batch_size - 1:
                #     loss.backward(retain_graph=True)
                # else:
                #     loss.backward()
                loss.backward()
                self.optimizer.step()
            self.counter += 1
            if self.counter % Refresh_gap == 0:
                torch.save(self.eval_nn, file)
                self.target_nn.fc1 = self.eval_nn.fc1
                self.target_nn.fc2 = self.eval_nn.fc2
                self.target_nn.out = self.eval_nn.out

    def store_transition(self, value):
        self.memory.insert(value)
class ExperienceReplay(object): 
    def __init__(self,maxSize, alpha=0.6):
        self.maxSize = maxSize
        self.buffer = Buffer(self.maxSize)
        self.curSize = 0

    def addExperience(self, experience):
        self.buffer.insert(experience)
        self.curSize = min(self.curSize+1,self.maxSize)

    def sample(self, samplesAmount):
        sampledPoints = np.random.choice(self.curSize, samplesAmount, replace=False).tolist()
        expList = []
        weightList = []
        for a in sampledPoints :
                expList.append(self.buffer.getItem(a))
                weightList.append(1.0/samplesAmount)
        return np.asarray(expList), weightList, None
示例#5
0
class RankBasedExpReplay(object): 
    def __init__(self,maxSize, alpha=0.6):
        self.maxSize = maxSize
        self.buffer = Buffer(self.maxSize)
        self.heap = Heap()
        self.weights = None

        #Add two flags to indicate whether alpha or queue size has changed
        self.prevAlpha = alpha
        self.prevSize =0

        # Variables to store current alpha and exp replay size
        self.alpha = alpha
        self.curSize = 0

        #Weightings to each experience
        self.endPoints = []

    def addExperience(self, experience):
        index = self.buffer.getPointer()
        self.buffer.insert(experience)
        weight = self.heap.getMaxPriority()
        self.heap.add(index, weight)
        self.curSize = self.heap.size
        
    def modifyExperience(self, weight, index):
        self.heap.add(index, weight)
        self.curSize = self.heap.size
        
    def sample(self, samplesAmount):

        if (self.prevAlpha != self.alpha) or (self.prevSize != self.curSize) :
                self.endPoints, self.weights = self.computeBoundaries(self.alpha, self.curSize, samplesAmount)
                self.prevAlpha = self.alpha
                self.prevSize = self.curSize
        totalWeights = sum(self.weights)
        startPoint = 0
        expList = []
        weightList = []
        indexList = []
        for a in self.endPoints :
                end = a + 1
                diff = end - startPoint 
                sampledNum = np.random.randint(diff, size=1)[0]
                retrIndex = startPoint + sampledNum
                startPoint = end
                expList.append(self.buffer.getItem(self.heap.getIndex(retrIndex)))
                weightList.append(self.weights[retrIndex]/totalWeights)
                indexList.append(retrIndex)
        return np.asarray(expList),np.asarray(weightList),np.asarray(indexList)

    def computeBoundaries(self, alpha, curSize, samplesAmount):
        ranks = list(range(curSize))
        weights = [(1.0/(rank+1))**alpha for rank in ranks]
        sumAllWeights = sum(weights)
        stops = np.linspace(0,sumAllWeights,samplesAmount+1).tolist()
        del stops[0]
        curSum = 0
        curFounded = 0
        curStop = -1
        results = []
        for a in weights:
                curSum += a
                curStop += 1
                if curSum >= stops[curFounded]:
                        results.append(curStop)
                        curFounded += 1

        return results, weights
    
    def rebalance(self):
        indexList = []
        weightList = []
        while self.heap.size != 0:
            maxIndex = self.heap.p2i[1]
            maxWeight = self.heap.p2w[1]
            indexList.append(maxIndex)
            weightList.append(maxWeight)
            self.heap.delete(maxIndex)
        for a in range(len(indexList)):
            self.add(indexList[a],weightList[a])
            
    def getMaxPriority(self):
        if self.heap.size == 0:
            return sys.float_info.max
        return self.heap.p2w[1]