def __init__(self): super().__init__() self.env = Env.OthelloEnv() self.initUI() self.i = 0 self.Q = test.Q
def f(state, action): e = Env.OthelloEnv() e.state = state s, r, d = e._step(action) return s, d
def main(): value_list = [] env = Env.OthelloEnv() Value = mc_prediction(random_policy, env, num_episodes=10) Value[27] = 1 Value[28] = 1 Value[35] = 1 Value[36] = 1 for i in range(64): value_list.append(Value[i]) # 现有的根据经验得到的矩阵 # value_list = [10, -9, 8, 4, 4, 8, -9, 10, # -9, -9, -4, -3, -3, -4, -9, -9, # 8, -4, 8, 2, 2, 8, -4, 8, # 4, 3, 2, 1, 1, 2, 3, 4, # 4, 3, 2, 1, 1, 2, 3, 4, # 8, -4, 8, 2, 2, 8, -4, 8, # -9, -9, -4, -3, -3, -4, -9, -9, # 10, -9, 8, 4, 4, 8, -9, 10] # 打印state矩阵 for i in range(0, 8): for j in range(0, 8): print(value_list[8 * i + j], end=' ') print() print() # 下棋开始 def mid_policy(state, flag): st = judge(state, flag) l = len(st) if (l == 0): return [0, 0] else: action = [] for i in range(l): action.append(value_list[i]) num = max(action) p = action.index(num) return [flag, st[p]] def f(env): total = 0 s = env._reset() flag = 1 action = mid_policy(s, flag) d = False while not d: s, r, d = env._step(action) # for i in range(0,8): # # for j in range(0,8): # # if(s[8*i+j]==-1): # # print(s[8*i+j],end=' ') # # else: # # print(s[8*i+j],end=' ') # # print() # # print() flag = -flag if (flag == 1): action = mid_policy(s, flag) else: action = random_policy(s, flag) for i in range(0, 8): for j in range(0, 8): total += s[8 * i + j] # print(total) if (total > 0): return 1 else: return 0 env = Env.OthelloEnv() f(env) win = 0 for i in range(1000): win += f(env) print(win / 1000)
import sys import random from collections import defaultdict import Othello as Env from Othello import judge def random_policy(state, flag): st = judge(state, flag) l = len(st) if l == 0: return [0, 0] else: p = random.randint(0, l - 1) return [flag, st[p]] def f(state, action): e = Env.OthelloEnv() e.state = state s, r, d = e._step(action) return s, d env = Env.OthelloEnv() env._reset() ss, dd = f(env.state, random_policy(env.state, 1)) print(ss) print(dd)