Пример #1
0
def generate_session(policy, opponent, cuda=False, t_max=100):
    """
    Play game until end or for t_max rounds.
    returns: list of states, list of actions and sum of rewards
    """
    states, actions = [], []
    total_reward = 0.

    b = Board()

    # Decide if we are player 1 or 2
    # player = np.random.choice((Board.PLAYER_1, Board.PLAYER_2), 1)
    player = Board.PLAYER_1

    if player == Board.PLAYER_2:
        # We are player two, let player one play first
        a = select_action(policy, b, cuda)
        b = b.insert(a.data[0][0])

    for t in range(t_max):
        # We move
        states.append(b)
        a = select_action(policy, b, cuda)
        actions.append(a)
        b = b.insert(a.data[0][0])

        winner = b.winner()
        if winner:
            if winner == player:
                total_reward = REWARD_WIN
            elif winner == '-':
                total_reward = REWARD_UNDECIDED
            else:
                print("Invalid result")
            break

        # Other player moves
        b = opponent(policy, b)

        winner = b.winner()
        if winner:
            if winner == '-':
                total_reward = REWARD_UNDECIDED
            elif winner != player:
                total_reward = REWARD_LOOSE
            else:
                print("Invalid result")
            break

    return states, actions, total_reward
Пример #2
0
def select_action(policy, board: Board, cuda=False, noise=0):
    # Get probabilities from neural network
    state = torch.from_numpy(board.matrix().reshape(BOARD_ROWS * BOARD_COLS)).float().unsqueeze(0)
    if cuda:
        state = state.cuda()
    probs = policy(Variable(state))

    # Exclude any results that are not allowed
    mult_np = np.zeros(len(POSSIBLE_ACTIONS), dtype=np.float32)
    allowed_actions = board.valid_actions()
    for i in POSSIBLE_ACTIONS:
        if i in allowed_actions:
            mult_np[i] = 1

    # Always choose winning move
    for a in allowed_actions:
        hypothetical_board = board.insert(a)
        if hypothetical_board.winner() == board.turn():
            mult_np = np.zeros(len(POSSIBLE_ACTIONS), dtype=np.float32)
            mult_np[a] = 1

    mult = Variable(torch.from_numpy(mult_np))
    noise = Variable(torch.from_numpy(mult_np * noise))
    if cuda:
        mult = mult.cuda()
        noise = noise.cuda()

    probs = probs * mult + noise
    if torch.sum(probs * mult).data[0] < 1e-40:
        # Neural network only offered things that are not allowed, so we go for random
        probs = probs + mult
    return probs.multinomial()
Пример #3
0
def test_insert_some_coins():
    b = Board()
    assert b.turn() == 'O'
    b = b.insert(3)
    assert b.turn() == 'X'
    assert b == Board([0, 0, 0, 0b01, 0, 0, 0])
    assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6)
    b = b.insert(2)
    assert b.turn() == 'O'
    assert b == Board([0, 0, 0b10, 0b01, 0, 0, 0])
    assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6)
    b = b.insert(2)
    assert b.turn() == 'X'
    assert b == Board([0, 0, 0b0110, 0b01, 0, 0, 0])
    assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6)
    b = b.insert(2)
    assert b.turn() == 'O'
    assert b == Board([0, 0, 0b100110, 0b01, 0, 0, 0])
    assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6)
    b = b.insert(2)
    assert b.turn() == 'X'
    assert b == Board([0, 0, 0b01100110, 0b01, 0, 0, 0])
    assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6)
    b = b.insert(2)
    assert b.turn() == 'O'
    assert b == Board([0, 0, 0b1001100110, 0b01, 0, 0, 0])
    assert b.valid_actions() == (0, 1, 2, 3, 4, 5, 6)
    b = b.insert(2)
    assert b.turn() == 'X'
    assert b == Board([0, 0, 0b011001100110, 0b01, 0, 0, 0])
    assert b.valid_actions() == (0, 1, 3, 4, 5, 6)
Пример #4
0
            click.echo(click.style(str(e), fg='red'))


b = Board()

# Decide if computer is player 1 or 2
computer_player = Board.PLAYER_1

if computer_player == Board.PLAYER_2:
    # Computer is player two, let player one play first
    b = do_human_action(b)

while True:
    # Computer moves
    a = select_action(policy, b)
    b = b.insert(a.data[0][0])

    winner = b.winner()
    if winner:
        print_board(b)
        if winner == computer_player:
            click.echo(click.style('Computer wins!', fg='green'))
        elif winner == '-':
            click.echo(click.style('Nobody wins!', fg='red'))
        else:
            print("Invalid result")
        sys.exit(0)

    # Other player moves
    b = do_human_action(b)
Пример #5
0
def test_insert_coins_full():
    b = Board([0, 0, 0b011001100110, 0b01, 0, 0, 0])
    with pytest.raises(ValueError):
        b.insert(2)