class Environment(object): def __init__(self): self.S = TwentyFortyEight() self.S.make_tables() self.score = 0 self.S.new_tile() print("New Episode") def reset(self): self.S.score = 0 self.S.cells = 0 self.score = 0 self.S.new_tile() return self.S.vectorize_state() def step(self, action): score_prev = self.S.score1() cells = self.S.cells self.S.move(action + 1) r = self.S.score1() - score_prev self.score = self.S.score1() if not cells == self.S.cells: self.S.new_tile() if not self.S.canMove(): return self.S.vectorize_state(), r, True return self.S.vectorize_state(), r, False def seed(self, a): return
def monte_play(game_state): ans=0.0 numRuns = 10 if(game_state.maxValue() >= 16): numRuns = 20 elif(game_state.maxValue() >= 32): numRuns = 50 elif(game_state.maxValue() >= 256): numRuns = 75 elif(game_state.maxValue() >= 512): numRuns = 120 for i in range(numRuns): # print(i) tmp=TwentyFortyEight() tmp.cells=game_state.cells tmp.score=game_state.score count = 0 while(tmp.canMove()): # count += 1 # print(count) dir=random.choice([1,2,3,4]) tmp.move(dir) tmp.new_tile() #print(game_state.cells) ans+=tmp.score1() ans /= numRuns return ans