Exemplo n.º 1
0
def getPrototypeDistribution(i):
	dist = []
	prototype = prototypes[i]
	for state in range(maxState):
		tempPrototype = Prototype(numActions, stateDimension)
		tempPrototype.setFixed([state/float(maxState)], 0)
		tempFeatureDiff = tempPrototype.calculateDiff(prototype)
		membershipGrade = float(exp(-(tempFeatureDiff*tempFeatureDiff)/2*prototype.getFeatureWidth()))
		dist.append(membershipGrade*prototype.getTheta())
	return dist
Exemplo n.º 2
0
def getQ(state, action):
	tempPrototype = Prototype(numActions, stateDimension)
	tempPrototype.setFixed([state/float(maxState)], action)
	thetaSum = 0

	for prototype in prototypes:
		tempFeatureDiff = tempPrototype.calculateDiff(prototype)
		membershipGrade = float(exp(-(tempFeatureDiff*tempFeatureDiff)/2*prototype.getFeatureWidth()))
		# print('state: ' + str(tempPrototype.state) + '  prototype.state: ' + str(prototype.state) + '  membershipGrade: ' + str(membershipGrade) + '  theta: ' + str(prototype.getTheta()))
		# raw_input("Press Enter to continue...")
		thetaSum += prototype.getTheta() * membershipGrade

	# print('thetaSum: ' + str(thetaSum))
	# raw_input("Press Enter to continue...")

	return thetaSum
Exemplo n.º 3
0
	def getPrototypeIndex(self, state, prototypeIndex):
		tempPrototype = Prototype(1, self.stateDimension)
		tempPrototype.setFixed([state/float(self.maxState + 1)], 0)
		pGrade = [0]*self.numGroups

		for i in range(len(self.prototypes)):
			prototype = self.prototypes[i]
			tempFeatureDiff = tempPrototype.calculateDiff(prototype)

			membershipGrade = float(exp(-(tempFeatureDiff*tempFeatureDiff)/2*prototype.getFeatureWidth()))#/sqrt(2*prototype.getFeatureWidth()*pi)
			if pGrade[prototype.group] < membershipGrade:
				prototypeIndex[prototype.group] = i
				pGrade[prototype.group] = membershipGrade


		return [p/sum(pGrade) for p in pGrade]
Exemplo n.º 4
0
    def getPrototypeIndex(self, state, prototypeIndex):
        tempPrototype = Prototype(1, self.stateDimension)
        tempPrototype.setFixed([state / float(self.maxState + 1)], 0)
        pGrade = [0] * self.numGroups

        for i in range(len(self.prototypes)):
            prototype = self.prototypes[i]
            tempFeatureDiff = tempPrototype.calculateDiff(prototype)

            membershipGrade = float(
                exp(-(tempFeatureDiff * tempFeatureDiff) / 2 *
                    prototype.getFeatureWidth())
            )  #/sqrt(2*prototype.getFeatureWidth()*pi)
            if pGrade[prototype.group] < membershipGrade:
                prototypeIndex[prototype.group] = i
                pGrade[prototype.group] = membershipGrade

        return [p / sum(pGrade) for p in pGrade]
Exemplo n.º 5
0
def learn(state1, action1, reward, state2):
	# maxQ = -float('inf')
	# for a in range(numActions):
	# 	tempQ = getQ(state2, a)
	# 	if (maxQ < tempQ):
	# 		maxQ = tempQ

	### sarsa

	maxQ = getQ(state2, 0)

	preQ = getQ(state1, action1)

	delta = reward + gamma*maxQ - preQ

	tempPrototype = Prototype(numActions, stateDimension)
	tempPrototype.setFixed([state1/float(maxState)], action1)

	for prototype in prototypes:
		tempFeatureDiff = tempPrototype.calculateDiff(prototype)

		membershipGrade = float(exp(-(tempFeatureDiff*tempFeatureDiff)/2*prototype.getFeatureWidth()))
		prototype.setTheta(prototype.getTheta() + alpha * delta * membershipGrade/numPrototypes)