def exp3(self, numActions, gamma, rewardMin=0, rewardMax=1):
        #weights = [1.0] * numActions

        t = 0
        while True:
            probabilityDistribution = distr(self.weights, gamma)
            choice = draw(probabilityDistribution)
            self.choiceFreq[choice] += 1
            #put choice in the queue
            self.choice_queue.put(choice)
            self.choice_queue.join()
            #get reward from queue
            theReward = self.reward_queue.get()
            self.reward_queue.task_done()
            #theReward = reward(choice, t)
            scaledReward = (theReward - rewardMin) / (
                rewardMax - rewardMin)  # rewards scaled to 0,1
            probChoice = float(self.choiceFreq[choice] + 1) / (t + 1)
            #estimatedReward = 1.0 * scaledReward / probabilityDistribution[choice]
            #estimatedReward = (1.0 * scaledReward) / probChoice
            estimatedReward = 1.0 * scaledReward * self.distanceLastchoice(
                t, choice)
            self.weights[choice] *= math.exp(
                estimatedReward * gamma /
                numActions)  # important that we use estimated reward here!

            yield choice, theReward, estimatedReward, self.weights
            t = t + 1
 def exp3(self,numActions, gamma, rewardMin = 0, rewardMax = 1):
    t = 0
    while True:
       probabilityDistribution = distr(self.weights, gamma)
       choice = draw(probabilityDistribution)
       self.choiceFreq[choice]+=1
       #put choice in the queue
       self.choice_queue.put(choice)
       self.choice_queue.join()
       #get reward from queue
       theReward = self.reward_queue.get()
       self.reward_queue.task_done()
       #theReward = reward(choice, t)
       
       scaledReward = (theReward - rewardMin) / (rewardMax - rewardMin) # rewards scaled to 0,1
       
       #-case frequency 
       #probChoice=float(self.choiceFreq[choice] +1)/(t+1)
       #estimatedReward = (1.0 * scaledReward) / probChoice
       
       #-case distribution
       #estimatedReward = 1.0 * scaledReward / probabilityDistribution[choice]
       
       #-case distance
       estimatedReward = 1.0 * scaledReward * self.distanceLastchoice(t,choice)
       
       self.weights[choice] *= math.exp(estimatedReward * gamma / numActions) # important that we use estimated reward here!
       yield choice, theReward, estimatedReward, self.weights
       t = t + 1
示例#3
0
def exp3(numActions, reward, gamma, rewardMin = 0, rewardMax = 1):
   weights = [1.0] * numActions

   t = 0
   while True:
      probabilityDistribution = distr(weights, gamma)
      choice = draw(probabilityDistribution)
      theReward = reward(choice, t)
      scaledReward = (theReward - rewardMin) / (rewardMax - rewardMin) # rewards scaled to 0,1

      estimatedReward = 1.0 * scaledReward / probabilityDistribution[choice]
      weights[choice] *= math.exp(estimatedReward * gamma / numActions) # important that we use estimated reward here!

      yield choice, theReward, estimatedReward, weights
      t = t + 1
示例#4
0
文件: exp3.py 项目: zn16/exp3
def exp3(numActions, reward, gamma, rewardMin=0, rewardMax=1):
    weights = [1.0] * numActions

    t = 0
    while True:
        probabilityDistribution = distr(weights, gamma)
        choice = draw(probabilityDistribution)
        theReward = reward(choice, t)
        scaledReward = (theReward - rewardMin) / (rewardMax - rewardMin
                                                  )  # rewards scaled to 0,1

        estimatedReward = 1.0 * scaledReward / probabilityDistribution[choice]
        weights[choice] *= math.exp(
            estimatedReward * gamma /
            numActions)  # important that we use estimated reward here!

        yield choice, theReward, estimatedReward, weights
        t = t + 1
 def bidding(self):
     bid = draw(self.pi)
     return (bid * self.eps)