def take_action(self, state): legal = [op for op in range(4) if board(state).slide(op) != -1] if legal: op = self.choice(legal) return action.slide(op) else: return action()
def take_action(self, before): best_v = float('-inf') best_a = None best_op = None for op in range(4): after = board(before) reward = after.slide(op) if reward != -1: tmp_v = reward + self.evaluate(after) if tmp_v > best_v: best_v = tmp_v best_a = action.slide(op) best_op = op if not self.isFirst: if best_v != float('-inf'): self.update(self.last_state, best_v) else: self.update(self.last_state, 0) self.last_state = board(before) self.last_state.slide(best_op) self.last_value = self.evaluate(self.last_state) self.isFirst = False if best_a == None: return action() else: return best_a
def take_action(self, state): scores = [board(state).slide(op) for op in range(4)] max_value = max(scores) if max_value != -1: max_index = scores.index(max_value) return action.slide(max_index) else: return action()
def take_action(self, state): ### ### Try your way ### Hint: select the action with maximum (reward + expect value) ### # random action legal = [op for op in range(4) if board(state).slide(op) != -1] if legal: op = self.choice(legal) return action.slide(op) else: return action()
def take_action(self, state, weight): #print(state) legal = list( filter(lambda x: x[1] != None, [(op, weight.evaluate(state, op)) for op in range(4)])) if legal: argmax = max(legal, key=itemgetter(1)) op = argmax[0] state.op = op return action.slide(op) else: return action()
def select_best_action(self, board_state): legal_ops = [ op for op in range(4) if board(board_state).slide(op) != -1 ] if legal_ops: best_op = 0 best_value = -1 for op in legal_ops: value = self.evaluate_state_action(board_state, op) if value > best_value: best_value = value best_op = op return action.slide(best_op) else: return action()
def take_action(self, state): expValues = [] rewards = [] for op in range(4): tmpBoard = board(state) # get reward of afterstate rewards.append(tmpBoard.slide(op)) if rewards[-1] == -1: # When the action is not allowed (reward==-1), # it is impossible to take the action expValues.append(-float("inf")) else: expValues.append(rewards[-1] + self.lineValue(tmpBoard)) if max(rewards) == -1: # if all the reward==-1, # then gameover return True, action() best_move = np.argmax(expValues) return False, action.slide(best_move)
def take_action(self, state): max_value = -1 max_op = -1 for op in range(4): new_board = board(state) reward = new_board.slide(op) if reward == -1: continue else: expect = 0 expect = self.get_value(new_board) expect += reward if expect > max_value: max_value = expect max_op = op if max_op == -1: return action() else: return action.slide(max_op)
return def initial_state(self): return board() def millisec(self): return int(round(time.time() * 1000)) if __name__ == '__main__': print('2048 Demo: episode.py\n') # action, reward, time usage moves = [] moves += [(action.place(0,1), 0, 1)] moves += [(action.place(1,1), 0, 1)] moves += [(action.slide(3), 2, 1)] for mv in moves: print(str(mv[0]) + str(mv[1]) + str(mv[2])) print("".join([str(move[0]) + ("[" + str(move[1]) + "]" if move[1] else "") + ("(" + str(move[2]) + ")" if move[2] else "") for move in moves])) sio = io.StringIO("0123") print(sio.read(1)) print(sio.read(1)) print(sio.read(1)) print(sio.read(1)) print(sio.read(1) == "") line = "".join([str(move[0]) + ("[" + str(move[1]) + "]" if move[1] else "") + ("(" + str(move[2]) + ")" if move[2] else "") for move in moves]) print(line) minput = io.StringIO(line) state = board()
def evaluate_state_action(self, board_state, op): move = action.slide(op) board_after_state, reward = self.compute_after_state(board_state, move) return reward + self.lineValue(board_after_state)