def play():
	bandit = BanditGame()
	total_money = bandit.money()
	money =  total_money
	arm_num = bandit.arm_num()
	for arm in range(1, arm_num+1):
		while money >= 1.0*total_money*(arm_num-arm)/arm_num:
			_, gain, money = bandit.play(arm)
			if money <= 0:
				return 0
			if money >= bandit.celling():
				return 1
	def __init__(self):
		self.__bandit = BanditGame()
		self.__celling = self.__bandit.celling()
		self.__arm_num = self.__bandit.arm_num()
		self.__stat = {}
		for arm in range(1, self.__arm_num+1):
			self.__stat[arm] = [0, 0, 0] # total, win, profit
def play():
	bandit = BanditGame()
	total_money = bandit.money()
	money =  total_money
	arm_num = bandit.arm_num()
	prior = []
	win = []
	total = []
	for arm in range(0, arm_num):
		prior.append([1,1])
		win.append(0)
		total.append(0)
	
	while money > 0 and money < bandit.celling():
		def find_best_one(selected_arms):
			best_e = float('-inf')
			best_arm = None
			for arm in selected_arms:
				current_e = (prior[arm][0] - prior[arm][1])*1.0/total[arm]
				if current_e > best_e:
					best_e = current_e
					best_arm = arm
			return best_arm

		def sample_an_arm():
			max_sample = float('-inf')
			best_arm = None
			mature_ones = []
			for arm in range(0, arm_num):
				if win[arm] >= 10 and prior[arm][0] < prior[arm][1]:
					continue
				if win[arm] >= 10 and prior[arm][0] > prior[arm][1]:
					mature_ones.append(arm)
					continue
				r = beta.rvs(prior[arm][0], prior[arm][1])
				# print r
				if r > max_sample:
					max_sample = r
					best_arm = arm
			if len(mature_ones) > 0:
				return find_best_one(mature_ones)
			if best_arm is not None:
				return best_arm
			else:
				return find_best_one(range(bandit.arm_num()))
		arm_chosen = sample_an_arm()
		spend, gain = play_one_till_win(arm_chosen+1, bandit)
		money = bandit.money()
		prior[arm_chosen][1] += spend
		total[arm_chosen] += (spend+1)
		if gain > 0:
			prior[arm_chosen][0] += gain
			win[arm_chosen] += 1
		# print prior
		# print win
	if money > bandit.celling():
		return 1
	else:
		return 0
def play():
	bandit = BanditGame()
	total_money = bandit.money()
	money =  total_money
	arm_num = bandit.arm_num()
	prior = []
	win = []
	total = []
	for arm in range(0, arm_num):
		prior.append([1,1])
		win.append(0)
		total.append(0)
	
	while money > 0 and money < bandit.celling():
		def sample_an_arm():
			max_sample = float('-inf')
			best_arm = None
			for arm in range(0, arm_num):
				r = beta.rvs(prior[arm][0], prior[arm][1])
				# print r
				if r > max_sample:
					max_sample = r
					best_arm = arm
			return best_arm

		arm_chosen = sample_an_arm()
		spend, gain = play_one_till_win(arm_chosen+1, bandit)
		money = bandit.money()
		prior[arm_chosen][1] += spend
		total[arm_chosen] += (spend+1)
		if gain > 0:
			prior[arm_chosen][0] += gain
			win[arm_chosen] += 1
		# print prior
		# print win
	if money > bandit.celling():
		return 1
	else:
		return 0
class PlayBandit(object):
	def __init__(self):
		self.__bandit = BanditGame()
		self.__celling = self.__bandit.celling()
		self.__arm_num = self.__bandit.arm_num()
		self.__stat = {}
		for arm in range(1, self.__arm_num+1):
			self.__stat[arm] = [0, 0, 0] # total, win, profit

	def play_chosen(self, arm):
		'''Play the chosen arm till win or lose'''
		money = self.__bandit.money()
		while money > 0 and money < self.__celling:
			_, gain, money = self.__bandit.play(arm)

	def find_best_and_play(self):
		best_e = self.__stat[1][2] / self.__stat[1][0]
		best_arm = 1
		for arm in range(1, self.__arm_num+1):
			current_e = self.__stat[arm][2] / self.__stat[arm][0]
			if current_e > best_e:
				best_e = current_e
				best_arm = arm
		self.play_chosen(best_arm)

	def has_positive_ones(self):
		for arm in range(1, self.__arm_num+1):
			if self.__stat[arm][1] >= 10 and (self.__stat[arm][2] / self.__stat[arm][0]) > 0:
				return True
		return False

	def find_insufficient_ones(self):
		insufficient_ones = []
		for arm in range(1, self.__arm_num+1):
			if self.__stat[arm][1] < 10:
				insufficient_ones.append(arm)
		return insufficient_ones


	def play_budget_on_insufficient_arms(self, insufficient_arms):
		money = self.__bandit.money()
		budgit_per_arm = money / len(insufficient_arms)
		for arm in insufficient_arms:
			start_money = money
			enough_stat = False
			while money > max((start_money - budgit_per_arm), 0) and money < self.__celling and not enough_stat:
				_, gain, money = self.__bandit.play(arm)
				self.__stat[arm][0] += 1
				self.__stat[arm][2] += gain
				if gain > 0:
					self.__stat[arm][1] += 1
				if self.__stat[arm][1] >= 10 and self.__stat[arm][2] < 0:
					enough_stat = True

	def play(self):
		money = self.__bandit.money()
		while money > 0 and money < self.__celling:
			if self.has_positive_ones():
				self.find_best_and_play()
			else:
				insufficient_arms = self.find_insufficient_ones()
				if len(insufficient_arms) > 0:
					self.play_budget_on_insufficient_arms(insufficient_arms)
				else:
					self.find_best_and_play()
			money = self.__bandit.money()
		# Check win or lose and return
		if money <= 0:
			return 0
		else:
			return 1