Пример #1
0
def update_value_map_random(value_map, player, move):
    value_map[player, move] = 0
    if not is_winner(player, move):
        otherplayer = 3 - player
        for next_move in next_states(otherplayer, move):
            if (otherplayer, next_move) not in value_map:
                update_value_map_random(value_map, otherplayer, next_move)
Пример #2
0
def player_move_good(player, state):
    otherplayer = 3 - player
    moves = next_states(player, state)
    winning_states = [
        next_state for next_state in moves if is_winner(player, next_state)
    ]
    if winning_states:
        return random.choice(winning_states)
    moves_other = next_states(otherplayer, state)
    blocking_states = [
        next_state
        for (next_state, next_state_other) in zip(moves, moves_other)
        if is_winner(otherplayer, next_state_other)
    ]
    if blocking_states:
        return random.choice(blocking_states)
    return moves[0]
Пример #3
0
def update_value_map_never_lose(value_map, player, move):
    otherplayer = 3 - player
    next_moves = next_states(otherplayer, move)
    if is_winner(player, move):
        value_map[player, move] = 1
    elif not next_moves:
        value_map[player, move] = 0
    else:
        # assume other player tries to maximize their value
        for next_move in next_moves:
            if (otherplayer, next_move) not in value_map:
                update_value_map_never_lose(value_map, otherplayer, next_move)
        max_value = max(value_map[otherplayer, next_move]
                        for next_move in next_moves)
        # if other player wins, we lose, so our value is minus their value
        value_map[player, move] = -max_value
Пример #4
0
def player_move_learner(player, state, value_map, verbose=True, explore=0.1):
    # List all possible next moves
    next_moves = next_states(player, state)
    # List probabilities of winning corresponding to possible next moves
    #(or 0.5 if no value assigned)
    next_moves_values = [value_map.get(i, 0.5) for i in next_moves]
    # Find the index of the move corresponding to the maximum value
    maxpos = [
        i for i, v in enumerate(next_moves_values)
        if v == max(next_moves_values)
    ]
    rand = random.random()
    # Randomly explore sometimes
    if rand < explore:
        if verbose: print("\033[1;32;40m expoloratory move \n")
        return random.choice(next_moves)
    # Otherwise return best move
    else:
        return next_moves[maxpos[0]]
Пример #5
0
def update_value_map_good(value_map, player, move):
    otherplayer = 3 - player
    next_moves = next_states(otherplayer, move)
    if is_winner(player, move):
        value_map[player, move] = 1
    elif not next_moves:
        value_map[player, move] = 0
    else:
        if any(is_winner(otherplayer, next_move) for next_move in next_moves):
            # bad score if other player can win in the next move
            value_map[player, move] = -1
        else:
            # heuristic score which prefers center, corners, and edges
            # in that order
            scores = (.01, .001, .01, .001, .1, .001, .01, .001, .01)
            value_map[player,
                      move] = sum(score for (val, score) in zip(move, scores)
                                  if val == player)
        for next_move in next_moves:
            if (otherplayer, next_move) not in value_map:
                update_value_map_good(value_map, otherplayer, next_move)
Пример #6
0
def get_value_map(update_func):
    value_map = {}
    initial_state = (0, 0, 0, 0, 0, 0, 0, 0, 0)
    for move in next_states(1, initial_state):
        update_func(value_map, 1, move)
    return value_map
Пример #7
0
def player_move_random(player, state):
    return random.choice(next_states(player, state))