def getPrototypeDistribution(i): dist = [] prototype = prototypes[i] for state in range(maxState): tempPrototype = Prototype(numActions, stateDimension) tempPrototype.setFixed([state/float(maxState)], 0) tempFeatureDiff = tempPrototype.calculateDiff(prototype) membershipGrade = float(exp(-(tempFeatureDiff*tempFeatureDiff)/2*prototype.getFeatureWidth())) dist.append(membershipGrade*prototype.getTheta()) return dist
def getV(self, state): tempPrototype = Prototype(1, self.stateDimension) tempPrototype.setFixed([state / float(self.maxState + 1)], 0) thetaSum = 0 pIndex = [0] * self.numGroups pGrade = self.getPrototypeIndex(state, pIndex) for i in range(len(pIndex)): thetaSum += self.prototypes[pIndex[i]].getTheta() * pGrade[i] return thetaSum
def getV(self, state): tempPrototype = Prototype(1, self.stateDimension) tempPrototype.setFixed([state/float(self.maxState+1)], 0) thetaSum = 0 pIndex = [0]*self.numGroups pGrade = self.getPrototypeIndex(state, pIndex) for i in range(len(pIndex)): thetaSum += self.prototypes[pIndex[i]].getTheta()*pGrade[i] return thetaSum
def getQ(state, action): tempPrototype = Prototype(numActions, stateDimension) tempPrototype.setFixed([state/float(maxState)], action) thetaSum = 0 for prototype in prototypes: tempFeatureDiff = tempPrototype.calculateDiff(prototype) membershipGrade = float(exp(-(tempFeatureDiff*tempFeatureDiff)/2*prototype.getFeatureWidth())) # print('state: ' + str(tempPrototype.state) + ' prototype.state: ' + str(prototype.state) + ' membershipGrade: ' + str(membershipGrade) + ' theta: ' + str(prototype.getTheta())) # raw_input("Press Enter to continue...") thetaSum += prototype.getTheta() * membershipGrade # print('thetaSum: ' + str(thetaSum)) # raw_input("Press Enter to continue...") return thetaSum
def getPrototypeIndex(self, state, prototypeIndex): tempPrototype = Prototype(1, self.stateDimension) tempPrototype.setFixed([state/float(self.maxState + 1)], 0) pGrade = [0]*self.numGroups for i in range(len(self.prototypes)): prototype = self.prototypes[i] tempFeatureDiff = tempPrototype.calculateDiff(prototype) membershipGrade = float(exp(-(tempFeatureDiff*tempFeatureDiff)/2*prototype.getFeatureWidth()))#/sqrt(2*prototype.getFeatureWidth()*pi) if pGrade[prototype.group] < membershipGrade: prototypeIndex[prototype.group] = i pGrade[prototype.group] = membershipGrade return [p/sum(pGrade) for p in pGrade]
def generatePrototypes(): if isRandom: for i in range(numPrototypes): numDifferent = 0 prototypes.append(Prototype(numActions, stateDimension)) while numDifferent < i: if prototypes[i].isDifferent(prototypes[numDifferent]): numDifferent += 1 else: prototypes[i].setRandomly() numDifferent = 0 else : for i in range(numPrototypes): p = Prototype(numActions, stateDimension) p.setFixed([i/float(numPrototypes+1) ], 0) prototypes.append(p)
def getPrototypeIndex(self, state, prototypeIndex): tempPrototype = Prototype(1, self.stateDimension) tempPrototype.setFixed([state / float(self.maxState + 1)], 0) pGrade = [0] * self.numGroups for i in range(len(self.prototypes)): prototype = self.prototypes[i] tempFeatureDiff = tempPrototype.calculateDiff(prototype) membershipGrade = float( exp(-(tempFeatureDiff * tempFeatureDiff) / 2 * prototype.getFeatureWidth()) ) #/sqrt(2*prototype.getFeatureWidth()*pi) if pGrade[prototype.group] < membershipGrade: prototypeIndex[prototype.group] = i pGrade[prototype.group] = membershipGrade return [p / sum(pGrade) for p in pGrade]
def generatePrototypes(self, isRandom): if isRandom: for i in range(self.numPrototypes): numDifferent = 0 self.prototypes.append(Prototype(1, self.stateDimension)) while numDifferent < i: if self.prototypes[i].isDifferent(self.prototypes[numDifferent]): numDifferent += 1 else: self.prototypes[i].setRandomly() numDifferent = 0 else : for i in range(self.numPrototypes): groupNum = i % self.numGroups p = Prototype(1, self.stateDimension, groupNum) p.setFixed([i/float(self.maxState+1) ], 0) self.prototypes.append(p)
def generatePrototypes(self, isRandom): if isRandom: for i in range(self.numPrototypes): numDifferent = 0 self.prototypes.append(Prototype(1, self.stateDimension)) while numDifferent < i: if self.prototypes[i].isDifferent( self.prototypes[numDifferent]): numDifferent += 1 else: self.prototypes[i].setRandomly() numDifferent = 0 else: for i in range(self.numPrototypes): groupNum = i % self.numGroups p = Prototype(1, self.stateDimension, groupNum) p.setFixed([i / float(self.maxState + 1)], 0) self.prototypes.append(p)
def learn(state1, action1, reward, state2): # maxQ = -float('inf') # for a in range(numActions): # tempQ = getQ(state2, a) # if (maxQ < tempQ): # maxQ = tempQ ### sarsa maxQ = getQ(state2, 0) preQ = getQ(state1, action1) delta = reward + gamma*maxQ - preQ tempPrototype = Prototype(numActions, stateDimension) tempPrototype.setFixed([state1/float(maxState)], action1) for prototype in prototypes: tempFeatureDiff = tempPrototype.calculateDiff(prototype) membershipGrade = float(exp(-(tempFeatureDiff*tempFeatureDiff)/2*prototype.getFeatureWidth())) prototype.setTheta(prototype.getTheta() + alpha * delta * membershipGrade/numPrototypes)