Пример #1
0
def play_game(state_list, action_list, Qtable):

	step = 0 # 合計ループ数
	old_state = new_state = [] #- state
	flag = 0
	reset()
	pt.init(0.5)

	# メインループ
	while(pt.alive()):

		#########################
		# ループごとの解析、操作をここに書く

		if step % 180 == 0 and flag == 0:
			new_board = pt.getBoard()
			for i in range(len(new_board)):
				for j in range(len(new_board[i])):
					if new_board[i][j] > 0 and new_board[i][j] < 8:	new_board[i][j] = 1
			piece = pt.getPiece()
			new_state = [new_board, piece]

		if new_state != old_state and flag == 0:
			action, value = egreedy.search_action(new_state)
			if action == []:
				action = egreedy.eGreedy(new_state)
			old_state = new_state
			flag = 1

		if step % 10 == 0 and flag == 1:
			flag = doaction(action, piece)
			if step % 100 == 0:	flag = 2

#			print(new_state)

		#########################
	
		# 次のループへ
		pt.loop()
		if flag == 2:
			print pt.param.flag['update']
			pt.move('down')
			if pt.param.flag['update']:
				print "update"
				pt.loop()
				flag = 0
			if pt.param.flag['gameover']:	alive = flag = 0
		step += 1

	# 終了    
	pt.quit()
Пример #2
0
def qLearning(maxsteps = 100, rate_a = 0.6, rate_g = 0.3):
	#- state = [board_state, tetrimino]:	盤面の状態, テトリミノ
	#- steps, maxsteps:	エピソード数
	#- state_list:	状態リスト.盤面の状態と落ちてきている駒
	#- action_list:	行動リスト.[横移動回数,回転の回数]を保持
	#- Qtable:	Q値を格納する二次元配列

	steps = 0
	flag = 0

	while steps < maxsteps:
		rate = change_rate(steps/maxsteps, rate_a, rate_g)
		alpha, gamma = rate_a * rate, rate_g * rate
		#state_list = []
		pt.init(0.5)
		alive = 1
		old_board = board = []
		
		print "step:", steps, "start"
		while alive: #- ゲームオーバーになるまで
			if flag == 0:
				board = pt.getBoard()
				if old_board != board:
					for i in range(0,len(board)):
						for j in range(0,len(board[i])):
							if board[i][j] != 0 and board[i][j] != 8:	board[i][j] = 1
					piece = pt.getPiece()
					state = [board, piece]

					#- 行動の探査・搾取
					action = egreedy.eGreedy(state) # softmaxでもよいが、今はとりあえず

					#- state_list, action_list, Qtableの更新
					if action not in lists.action_list:
						lists.action_list.append(action)
					add_state(state)
					updateQ.updateQ(state, action, alpha, gamma)

					#- actionの実行
					old_board = board
					flag = 1

			#次の状態を準備
			if flag == 1:
				step = 0
				while flag != 2:
					flag = doaction(action, piece)
					step += 1
					if step % 100 == 0:
						flag = 2

			elif flag == 2:
				pt.loop()
				#pt.move('down')
                                pt.drop()
				if pt.param.flag['update']:
					pt.loop()
					flag = 0
				if pt.param.flag['gameover']:	alive = flag = 0

		steps += 1
		reset()
		check = 1
#		if len(lists.Qtable) != len(lists.state_list):	print "check"
		while check:
			if [] in lists.Qtable:	lists.Qtable.remove([])
			else:	check = 0

#		if len(lists.Qtable) != len(lists.state_list):	print len(lists.state_list),len(lists.Qtable)
#		flag = steps / maxsteps

	return lists.state_list, lists.action_list, lists.Qtable
Пример #3
0
    trials = 50

    plays = 1000
    samples = 20

    rewards1 = np.zeros((trials, plays))
    rewards2 = np.zeros((trials, plays))
    rewards3 = np.zeros((trials, plays))

    optimal_rewards = np.zeros((trials, plays))

    #env = bern_bandit.BernBandits()
    for t in range(trials):
        #env = bandits.Bandits()
        env = bern_bandit.BernBandits()
        strat1 = egreedy.eGreedy(env.n)
        strat2 = ucb.UCB(env.n)
        strat3 = thompson.Thompson(env.n)

        for i in range(plays):
            arm1 = strat1.action()
            arm2 = strat2.action()
            arm3 = strat3.action()

            r1 = env.pull(arm1)
            r2 = env.pull(arm2)
            r3 = env.pull(arm3)

            strat1.update(arm1, r1)
            strat2.update(arm2, r2)
            strat3.update(arm3, r3)