示例#1
0
def value_move(board,active_turn,output_fun,exploration):
    board = board.reshape((1,9))
    X_sym = theano.tensor.matrix()
    y_sym = theano.tensor.ivector()

    player_dict = {'X':1, 'O':-1}

    dummy_board = player_dict[active_turn] * board[:]
    options = ttt.available_moves(dummy_board)
    
    
    if exploration > random.random():
        move = random.choice(options) 
    else:
        move_values = np.zeros(9)
        for move in options:
            dummy_board = player_dict[active_turn] * board[:]
            dummy_board[0][move] = 1
            move_values[move] = -1 * output_fun(-1* dummy_board)
        

        available_move_values = np.array([move_values[move] for move in options])
        
        move = options[available_move_values.argmax(-1)]
    return move + 1
示例#2
0
def alpha_beta_move(board,active_turn,depth,alpha = 2):
    swap_dict = {'X':'O','O':'X'}
    dummy_board = np.arange(9)
    dummy_board[:] = board[:]
    options = ttt.available_moves(board)
    random.shuffle(options)
    player_dict = {'X':1, 'O':-1}
    if len(options) == 1:
        dummy_board[options[0]] = player_dict[active_turn]
        if ttt.winner(dummy_board):
            return (1,options[0]+1)
        else:
            return (0,options[0]+1)
    if depth ==0:
        return (0, options[np.random.randint(len(options))]+1)

    best_value = -2
    candidate_move = None
    for x in options:
        dummy_board[x] = player_dict[active_turn]
        if ttt.winner(dummy_board):
            return (1, x+1)
        (opp_value,opp_move) = alpha_beta_move(dummy_board,swap_dict[active_turn],depth-1,-best_value)
        if -opp_value > best_value:
            candidate_move = x+1
            best_value = -opp_value
        if -opp_value >= alpha:
            #print (options, x, best_value, alpha)
            break
        dummy_board[x] = board[x]

    return (best_value, candidate_move)
示例#3
0
def monte_carlo_sample(board_state, side):
    """Sample a single rollout from the current board_state and
     side. Moves are made to the current board_state until we reach a
     terminal state then the result and the first move made to get
     there is returned.

    Args:

    board_state (3x3 tuple of int): state of the board

    side (int): side currently to play. +1 for the plus player, -1 for
        the minus player

    Returns: (result(int), move(int,int)): The result from this
        rollout, +1 for a win for the plus player -1 for a win for the
        minus player, 0 for a draw

    """
    result = has_winner(board_state)
    if result != 0:
        return result, None
    moves = list(available_moves(board_state))
    if not moves:
        return 0, None

    # select a random move
    move = random.choice(moves)
    result, next_move = monte_carlo_sample(apply_move(board_state, move, side),
                                           -side)
    return result, move
示例#4
0
def alpha_beta_move(board, turn, depth = 0, alpha = (-inf,-inf), beta = (inf,inf), evaluation = lambda x: 0):
    dummy_board = np.copy(board).reshape(9) # we don't want to change the board state

    swap_player = {1:-1,-1:1} # So we can change whose turn
    options = ttt.available_moves(board) # get legal moves
    random.shuffle(options) # should inherit move order instead of randomizing

    best_value = (-inf,-inf)
    
    if not options:
        print board, cccc.game_over(board)
        print 'oops, no available moves'
    cand_move = options[0]
    if depth == 0: 
        for x in options:
            update_move(dummy_board,x,turn)
            op_value = (evaluation(dummy_board*swap_player[turn]) , depth)

            if tuple(-1 * el for el in op_value) > best_value:
                cand_move = x
                best_value = tuple(-1 * el for el in op_value)
                alpha = max(alpha, best_value)
            if alpha >= beta:
                break   #alpha-beta cutoff
            unupdate_move(dummy_board,x)
    else:
        for x in options:

            update_move(dummy_board,x,turn)
        
            if ttt.winner(dummy_board): #should check over and tied too
                return((inf,depth), x)
            
            if ttt.is_full(dummy_board): #This assumes you can't lose on your turn
                return((0,depth) , x)
            
            op_value,_ = alpha_beta_move( dummy_board,
                                            swap_player[turn],
                                            depth-1,
                                            alpha = tuple(-1 * el for el in beta),
                                            beta = tuple(-1 * el for el in alpha),
                                            evaluation = evaluation)

            if tuple(-1 * el for el in op_value) > best_value:
                cand_move = x
                best_value = tuple(-1 * el for el in op_value)
                alpha = max(alpha, best_value)
    #        print depth,-op_value, best_value, cand_move,alpha,beta
            if alpha >= beta:
    #                print 'pruned'
                break   #alpha-beta cutoff
            unupdate_move(dummy_board,x)
    #        dummy_board[height, x] = 0
    return (best_value, cand_move)
示例#5
0
def min_max_alpha_beta(board_state,
                       side,
                       max_depth,
                       evaluation_func=evaluate,
                       alpha=-sys.float_info.max,
                       beta=sys.float_info.max):
    """Runs the min_max_algorithm on a given board_sate for a given side, to a given depth in order to find the best
    move

    Args:
        board_state (3x3 tuple of int): The board state we are evaluating
        side (int): either +1 or -1
        max_depth (int): how deep we want our tree to go before we use the evaluate method to determine how good the
        position is.
        evaluation_func (board_state -> int): Function used to evaluate the position for the plus player
        alpha (float): Used when this is called recursively, normally ignore
        beta (float): Used when this is called recursively, normally ignore

    Returns:
        (best_score(int), best_score_move((int, int)): the move found to be best and what it's min-max score was
    """
    best_score_move = None
    moves = list(available_moves(board_state))
    if not moves:
        return 0, None

    for move in moves:
        new_board_state = apply_move(board_state, move, side)
        winner = has_winner(new_board_state)
        if winner != 0:
            return winner * 10000, move
        else:
            if max_depth <= 1:
                score = evaluation_func(new_board_state)
            else:
                score, _ = min_max_alpha_beta(new_board_state, -side,
                                              max_depth - 1, alpha, beta)

        if side > 0:
            if score > alpha:
                alpha = score
                best_score_move = move
        else:
            if score < beta:
                beta = score
                best_score_move = move
        if alpha >= beta:
            break

    return alpha if side > 0 else beta, best_score_move
示例#6
0
def get_max_future(future_board,value_fun):
    options = ttt.available_moves(future_board)
    dummy_board = np.copy(future_board)
    move_values = np.zeros(9)
    for move in options:
        dummy_board = np.copy(future_board)
        dummy_board[move] = -1
        dummy_board = dummy_board.reshape(1,9)
        if ttt.winner(dummy_board):
            move_values[move] = ttt.winner(dummy_board)
        else:
            move_values[move] = value_fun(dummy_board)
    
    available_move_values = np.array([move_values[move] for move in options])
    dummy_board = np.copy(future_board)
    options_index = np.argmin(available_move_values)
    dummy_board[options[options_index]] = -1
    return np.amin(available_move_values), dummy_board
示例#7
0
def min_max(board_state, side, max_depth, evaluation_func=evaluate):
    """Runs the min_max_algorithm on a given board_sate for a given side, to a given depth in order to find the best
    move

    Args:
        board_state (3x3 tuple of int): The board state we are evaluating
        side (int): either +1 or -1
        max_depth (int): how deep we want our tree to go before we use the evaluate method to determine how good the
        position is.
        evaluation_func (board_state -> int): Function used to evaluate the position for the plus player

    Returns:
        (best_score(int), best_score_move((int, int)): the move found to be best and what it's min-max score was
    """
    best_score = None
    best_score_move = None

    moves = list(available_moves(board_state))
    if not moves:
        # this is a draw
        return 0, None

    for move in moves:
        new_board_state = apply_move(board_state, move, side)
        winner = has_winner(new_board_state)
        if winner != 0:
            return winner * 10000, move
        else:
            if max_depth <= 1:
                score = evaluation_func(new_board_state)
            else:
                score, _ = min_max(new_board_state, -side, max_depth - 1)
            if side > 0:
                if best_score is None or score > best_score:
                    best_score = score
                    best_score_move = move
            else:
                if best_score is None or score < best_score:
                    best_score = score
                    best_score_move = move
    return best_score, best_score_move
示例#8
0
def policy_move(board,active_turn,output_fun,exploration):
    board = board.reshape((1,9))
    X_sym = theano.tensor.matrix()
    y_sym = theano.tensor.ivector()

    player_dict = {'X':1, 'O':-1}
    dummy_board = player_dict[active_turn] * board[:] #make 1s good and -1s bad
    

    move_weights = output_fun(dummy_board)
    move_weights = move_weights.reshape(9)

    options = ttt.available_moves(dummy_board)
    
    if exploration > random.random():
        move = random.choice(options) 
    else:
        available_move_weights = np.array([move_weights[i] for i in options])

        move = options[available_move_weights.argmax(-1)]
    
    return move+1
示例#9
0
def monte_carlo(board,epsilon = 0.5,duration = 1,player=1):
    plays = {}
    results = {}
    t0 = time.clock()
    plays[tuple(board)] = 0
    results[tuple(board)]=0
    
    while time.clock()-t0 < duration:
        current_player = player
        dummy_board = np.copy(board)
        branch = [(np.copy(dummy_board),current_player)]   

        
        while not game_over(dummy_board):
            options = ttt.available_moves(dummy_board)
            future_boards = [next_board(dummy_board,move,current_player) for move in options]
            
            if all(plays.get(tuple(b)) for b in future_boards):
                if random.random() > epsilon:
                    dummy_board = random.choice(future_boards)
                else:
                    #min here because you are maximizing over future boards, which the results are given in terms of the
                    #current player, i.e. the other player.
                    dummy_board = min(future_boards,key = lambda x : results[tuple(x)] / float(plays[tuple(x)])) 
                    
            
            else:
                dummy_board = random.choice(future_boards)
                plays[tuple(dummy_board)] = 0
                results[tuple(dummy_board)]=0
            current_player *= -1    
            branch.append((np.copy(dummy_board),current_player))
        

        for b,p in branch:
            plays[tuple(b)] +=1
            results[tuple(b)] += p * ttt.winner(dummy_board)
            
    return results[tuple(board)] / float(plays[tuple(board)])
示例#10
0
def mc_step(branch,results,epsilon, cutoff = 10000):
    dummy_board = np.copy(branch[-1])
    #To help convergence we will randomly drop stored values

    #if random.random() < 1/float(cutoff):
    #    results[tuple(dummy_board)] =  {'result':0,'plays':0}     

    
    if not results.get(tuple(dummy_board)):
        results[tuple(dummy_board)] = {'result':0,'plays':0}
        
    board_plays = results[tuple(dummy_board)]['plays']
    board_result = results[tuple(dummy_board)]['result']
    
    if game_over(dummy_board):
        result = ttt.winner(dummy_board)
        
    elif board_plays> cutoff:
        result = results[tuple(dummy_board)]['result'] / float(results[tuple(dummy_board)]['plays'])
        
    else: 
        options = ttt.available_moves(dummy_board)
        future_boards = [next_board(dummy_board,move,1) for move in options]
        if all(results.get(tuple(-1 * b)) for b in future_boards):
            if epsilon(board_plays) > random.random():
                dummy_board = random.choice(future_boards)
            else:
                dummy_board = min(future_boards,key = lambda x :
                                  results[tuple(-1 * x)]['result'] / float(results[tuple(-1 * x)]['plays'])) 
        
        else: 
            dummy_board = random.choice(future_boards)
            
        branch.append(-1 * np.copy(dummy_board))
        result , _ = mc_step(branch,results,epsilon,cutoff)
        result = -1 * result
    
    return result , branch
示例#11
0
def random_move(board,turn):
    options = ttt.available_moves(board)
    move = random.choice(options)
    dummy_board = np.copy(board)
    dummy_board[move] = turn
    return dummy_board
示例#12
0
def monte_carlo_tree_search_uct(board_state, side, number_of_samples):
    """Evaluate the best from the current board_state for the given side using monte carlo sampling with upper
    confidence bounds for trees.

    Args:
        board_state (3x3 tuple of int): state of the board
        side (int): side currently to play. +1 for the plus player, -1 for the minus player
        number_of_samples (int): number of samples rollouts to run from the current position, the higher the number the
            better the estimation of the position

    Returns:
        (result(int), move(int,int)): The average result for the best move from this position and what that move was.
    """
    state_results = collections.defaultdict(float)
    state_samples = collections.defaultdict(float)

    for _ in range(number_of_samples):
        current_side = side
        current_board_state = board_state
        first_unvisited_node = True
        rollout_path = []
        result = 0

        while result == 0:
            move_states = {
                move: apply_move(current_board_state, move, current_side)
                for move in available_moves(current_board_state)
            }

            if not move_states:
                result = 0
                break

            if all((state in state_samples) for _, state in move_states):
                log_total_samples = math.log(
                    sum(state_samples[s] for s in move_states.values()))
                move, state = max(
                    move_states,
                    key=lambda _, s: _upper_confidence_bounds(
                        state_results[s], state_samples[s], log_total_samples))
            else:
                move = random.choice(list(move_states.keys()))

            current_board_state = move_states[move]

            if first_unvisited_node:
                rollout_path.append((current_board_state, current_side))
                if current_board_state not in state_samples:
                    first_unvisited_node = False

            current_side = -current_side

            result = has_winner(current_board_state)

        for path_board_state, path_side in rollout_path:
            state_samples[path_board_state] += 1.
            result *= path_side
            # normalize results to be between 0 and 1 before this it between -1 and 1
            result /= 2.
            result += .5
            state_results[path_board_state] += result

    move_states = {
        move: apply_move(board_state, move, side)
        for move in available_moves(board_state)
    }

    move = max(move_states,
               key=lambda x: state_results[move_states[x]] / state_samples[
                   move_states[x]])

    return state_results[move_states[move]] / state_samples[
        move_states[move]], move