示例#1
0
文件: Bandit.py 项目: HTCode/SimpleML
	def choose_pursuit(self):
		means = [ np.mean(L) for L in self.rewards ]
		id_max = means.index( max(means) )
		self.WE = [ pi + self.epsilon*(1.-pi) if i == id_max else pi + self.epsilon*(0.-pi) for i,pi in enumerate( self.WE )  ]
		id_a = weighted_choice( range(self.K), self.WE )
		
		return id_a
示例#2
0
文件: Bandit.py 项目: HTCode/SimpleML
	def choose_reinforcement(self):
		sco = [ math.exp(pi) for pi in self.WE ]
		sco = [ s / sum(sco) for s in sco ]
		id_a = weighted_choice( range(self.K), sco )
		return id_a
示例#3
0
文件: Bandit.py 项目: HTCode/SimpleML
	def choose_boltzmann(self):
		means = [ np.mean(L) for L in self.rewards ]
		sco = [ math.exp(m / self.epsilon) for m in means ]
		sco = [ s / sum(sco) for s in sco ]
		id_a = weighted_choice( range(self.K), sco )
		return id_a
示例#4
0
文件: Bandit.py 项目: HTCode/SimpleML
	def choose_EXP3(self):
		P = [ (1. - self.epsilon) * w/sum(self.WE) + self.epsilon * 1./self.K for w in self.WE ]
		id_a = weighted_choice( range(self.K), P )
		return id_a