Exemplo n.º 1
0
 def take_action(self, state):
     legal = [op for op in range(4) if board(state).slide(op) != -1]
     if legal:
         op = self.choice(legal)
         return action.slide(op)
     else:
         return action()
Exemplo n.º 2
0
    def take_action(self, before):
        best_v = float('-inf')
        best_a = None
        best_op = None
        for op in range(4):
            after = board(before)
            reward = after.slide(op)
            if reward != -1:
                tmp_v = reward + self.evaluate(after)
                if tmp_v > best_v:
                    best_v = tmp_v
                    best_a = action.slide(op)
                    best_op = op
        if not self.isFirst:
            if best_v != float('-inf'):
                self.update(self.last_state, best_v)
            else:
                self.update(self.last_state, 0)
        self.last_state = board(before)
        self.last_state.slide(best_op)
        self.last_value = self.evaluate(self.last_state)
        self.isFirst = False

        if best_a == None:
            return action()
        else:
            return best_a
Exemplo n.º 3
0
 def take_action(self, state):
     scores = [board(state).slide(op) for op in range(4)]
     max_value = max(scores)
     if max_value != -1:
         max_index = scores.index(max_value)
         return action.slide(max_index)
     else:
         return action()
Exemplo n.º 4
0
 def take_action(self, state):
     ###
     ###   Try your way
     ###   Hint: select the action with maximum (reward + expect value)
     ###
     # random action
     legal = [op for op in range(4) if board(state).slide(op) != -1]
     if legal:
         op = self.choice(legal)
         return action.slide(op)
     else:
         return action()
Exemplo n.º 5
0
 def take_action(self, state, weight):
     #print(state)
     legal = list(
         filter(lambda x: x[1] != None,
                [(op, weight.evaluate(state, op)) for op in range(4)]))
     if legal:
         argmax = max(legal, key=itemgetter(1))
         op = argmax[0]
         state.op = op
         return action.slide(op)
     else:
         return action()
Exemplo n.º 6
0
 def select_best_action(self, board_state):
     legal_ops = [
         op for op in range(4) if board(board_state).slide(op) != -1
     ]
     if legal_ops:
         best_op = 0
         best_value = -1
         for op in legal_ops:
             value = self.evaluate_state_action(board_state, op)
             if value > best_value:
                 best_value = value
                 best_op = op
         return action.slide(best_op)
     else:
         return action()
Exemplo n.º 7
0
 def take_action(self, state):
     expValues = []
     rewards = []
     for op in range(4):
         tmpBoard = board(state)
         # get reward of afterstate
         rewards.append(tmpBoard.slide(op))
         if rewards[-1] == -1:
             # When the action is not allowed (reward==-1),
             # it is impossible to take the action
             expValues.append(-float("inf"))
         else:
             expValues.append(rewards[-1] + self.lineValue(tmpBoard))
     if max(rewards) == -1:
         # if all the reward==-1,
         # then gameover
         return True, action()
     best_move = np.argmax(expValues)
     return False, action.slide(best_move)
Exemplo n.º 8
0
 def take_action(self, state):
     max_value = -1
     max_op = -1
     for op in range(4):
         new_board = board(state)
         reward = new_board.slide(op)
         if reward == -1:
             continue
         else:
             expect = 0
             expect = self.get_value(new_board)
             expect += reward
             if expect > max_value:
                 max_value = expect
                 max_op = op
     if max_op == -1:
         return action()
     else:
         return action.slide(max_op)
Exemplo n.º 9
0
        return
    
    def initial_state(self):
        return board()
    
    def millisec(self):
        return int(round(time.time() * 1000))
        
    
if __name__ == '__main__':
    print('2048 Demo: episode.py\n')
    # action, reward, time usage
    moves = []
    moves += [(action.place(0,1), 0, 1)]
    moves += [(action.place(1,1), 0, 1)]
    moves += [(action.slide(3), 2, 1)]
    for mv in moves:
        print(str(mv[0]) + str(mv[1]) + str(mv[2]))
    print("".join([str(move[0]) + ("[" + str(move[1]) + "]" if move[1] else "") + ("(" + str(move[2]) + ")" if move[2] else "") for move in moves]))
    
    sio = io.StringIO("0123")
    print(sio.read(1))
    print(sio.read(1))
    print(sio.read(1))
    print(sio.read(1))
    print(sio.read(1) == "")
    
    line = "".join([str(move[0]) + ("[" + str(move[1]) + "]" if move[1] else "") + ("(" + str(move[2]) + ")" if move[2] else "") for move in moves])
    print(line)
    minput = io.StringIO(line)
    state = board()
Exemplo n.º 10
0
 def evaluate_state_action(self, board_state, op):
     move = action.slide(op)
     board_after_state, reward = self.compute_after_state(board_state, move)
     return reward + self.lineValue(board_after_state)