def value_policy(board: chess.Board):
    env = ChessEnv(board)
    game_over, score = env.is_game_over()
    if game_over:
        return score, []
    stockfish = Stockfish()
    value = stockfish.stockfish_eval(env.board, timeout=100)
    next_states = []
    for move in env.board.legal_moves:
        board_copy = env.board.copy()
        board_copy.push(move)
        next_states.append(board_copy)

    actions_value = []
    for state in next_states:
        actions_value.append(evaluate_state(state))

    policy = softmax(actions_value)

    index_list = [Config.MOVETOINDEX[move.uci()] for move in env.board.legal_moves]
    map = np.zeros((5120,))
    for index, pi in zip(index_list, policy):
        map[index] = pi
    assert policy.sum() > 0.999
    return value, map
 def __init__(self, board=None):
     self.board = board
     self.num_halfmoves = 0
     self.winner = None  # type: Winner
     self.resigned = False
     self.result = None
     self.state_count = dict()
     self.stockfish = Stockfish()
Пример #3
0
def pretrain(model):
    feature_batch = []
    targets_batch = []
    board_positions = get_board_position()
    shuffle(board_positions)
    print("Pretraining on {} board positions...".format(len(board_positions)))
    stockfish = Stockfish()

    for batch in range(Config.PRETRAIN_EPOCHS):
        for index, board_position in enumerate(board_positions):
            if (index + 1) % Config.minibatch_size != 0:
                feature_batch.append(board_to_feature(board_position))
                targets_batch.append(
                    stockfish.stockfish_eval(board_position, 10))
            else:
                feature_batch = torch.FloatTensor(feature_batch)
                targets_batch = Variable(torch.FloatTensor(targets_batch))
                do_backprop(feature_batch, targets_batch, model)
                feature_batch = []
                targets_batch = []
        print("Completed batch {} of {}".format(batch, Config.PRETRAIN_EPOCHS))
def pretrain(model, boards):

    iters = 0
    feature_batch = []
    targets_val_batch = []
    targets_pol_batch = []
    shuffle(boards)
    print("Pretraining on {} board positions...".format(len(boards)))
    stockfish = Stockfish()

    for batch in range(Config.PRETRAIN_EPOCHS):
        for index, board_position in enumerate(boards):
            if (index + 1) % Config.minibatch_size != 0:
                try:
                    value, policy, board = board_position
                except:
                    pass

                targets_pol_batch.append(policy)
                targets_val_batch.append(value)
                print(index)
                feature_batch.append(board_to_feature(board))

            else:
                feature_batch = torch.FloatTensor(feature_batch)
                targets_val_batch = Variable(
                    torch.FloatTensor(targets_val_batch))
                targets_pol_batch = Variable(
                    torch.FloatTensor(targets_pol_batch))
                do_backprop(feature_batch, targets_val_batch,
                            targets_pol_batch, model, iters)
                iters = iters + 1
                feature_batch = []
                targets_val_batch = []
                targets_pol_batch = []
        print("Completed batch {} of {}".format(batch, Config.PRETRAIN_EPOCHS))
class ChessEnv:
    def __init__(self, board=None):
        self.board = board
        self.num_halfmoves = 0
        self.winner = None  # type: Winner
        self.resigned = False
        self.result = None
        self.state_count = dict()
        self.stockfish = Stockfish()

    def reset(self):
        self.board = chess.Board()
        self.num_halfmoves = 0
        self.winner = None
        self.resigned = False

        # count first board state
        self.state_count = dict()
        transposition = self.board._transposition_key()
        self.state_count[transposition] = 1

        return self

    def update(self, board):
        self.board = chess.Board(board)
        self.winner = None
        self.resigned = False
        return self

    @property
    def done(self):
        return self.winner is not None

    @property
    def white_won(self):
        return self.winner == Winner.white

    @property
    def white_to_move(self):
        return self.board.turn == chess.WHITE

    @property
    def repetition(self):
        return self.state_count[self.board._transposition_key]

    def step(self, action: str, check_over=True):
        """
        :param action:
        :param check_over:
        :return:
        """
        if check_over and action is None:
            self._resign()
            return
        self.board.push_uci(action)
        self.update_state_count()

        self.num_halfmoves += 1

        if check_over and self.board.result(claim_draw=True) != "*":
            #    print('Board resultd')
            #    print(self.board.result(claim_draw=True))
            self._game_over()

    def _game_over(self):
        if self.winner is None:
            self.result = self.board.result(claim_draw=True)
            if self.result == '1-0':
                self.winner = Winner.white
            elif self.result == '0-1':
                self.winner = Winner.black
            else:
                self.winner = Winner.draw

    def is_game_over(self,
                     moves=0,
                     res_check=False,
                     testing_flag=False) -> tuple:
        if testing_flag:
            return True, 0
        if self.board.is_game_over():
            score = self.board.result()
            # print(score)
            if score == '0-1':
                return True, -Config.GAME_SCORE
            if score == '1/2-1/2':
                return True, 0
            if score == '1-0':
                return True, Config.GAME_SCORE
        elif (moves > Config.RESIGN_CHECK_MIN) and (
                not moves % Config.RESIGN_CHECK_FREQ) and res_check:
            return self.stockfish.check_resignation(self.board)
        return False, None

    def _resign(self):
        self.resigned = True
        if self.white_to_move:  # WHITE RESIGNED!
            self.winner = Winner.black
            self.result = "0-1"
        else:
            self.winner = Winner.white
            self.result = "1-0"

    def adjudicate(self):
        score = self.testeval(absolute=True)
        if abs(score) < 0.01:
            self.winner = Winner.draw
            self.result = "1/2-1/2"
        elif score > 0:
            self.winner = Winner.white
            self.result = "1-0"
        else:
            self.winner = Winner.black
            self.result = "0-1"

    def ending_average_game(self):
        self.winner = Winner.draw
        self.result = "1/2-1/2"

    def copy(self):
        env = copy.copy(self)
        env.board = copy.copy(self.board)
        return env

    def render(self):
        print("\n")
        print(self.board)
        print("\n")

    @property
    def observation(self):
        return self.board.fen()

    # returns list of legal moves in UCI format.
    @property
    def legal_moves(self):
        return list(self.board.legal_moves)

    def deltamove(self, fen_next):
        moves = list(self.board.legal_moves)
        for mov in moves:
            self.board.push(mov)
            fee = self.board.fen()
            self.board.pop()
            if fee == fen_next:
                return mov.uci()
        return None

    def replace_tags(self):
        return replace_tags_board(self.board.fen())

    def canonical_input_planes(self):
        return canon_input_planes(self.board.fen())

    def testeval(self, absolute=False) -> float:
        return testeval(self.board.fen(), absolute)

    # def get_planes(self):
    #     move_count_plane = np.full((8,8), self.num_halfmoves, dtype=np.float32)
    #     player_colour_plane = np.full((8,8),(self.num_halfmoves%2)+1,dtype = np.float32) # 1 when white, 0 when black
    #
    #     piece_planes, aux_planes = canonical_input_planes()
    #     rep_planes = repetition_planes(self)
    #     curr_planes = np.vstack((piece_planes,rep_planes,player_colour_plane,move_count_plane,aux_planes))
    #     assert curr_planes.shape == (21,8,8)
    #     return curr_planes

    # returns 2 planes, one for each repetition of state
    def repetition_planes(self):
        state = self.board._transposition_key()
        if self.state_count[state] == 1:
            rep1 = np.full([8, 8], 1, dtype=np.float32)
            rep2 = np.full([8, 8], 0, dtype=np.float32)

        elif self.state_count[state] == 2:
            rep1 = np.full([8, 8], 1, dtype=np.float32)
            rep2 = np.full([8, 8], 1, dtype=np.float32)

        else:
            rep1 = np.full([8, 8], 0, dtype=np.float32)
            rep2 = np.full([8, 8], 0, dtype=np.float32)
        reps = np.vstack((rep1, rep2))
        assert reps.shape == (2, 8, 8)
        return reps

    def update_state_count(self):
        state = self.board._transposition_key()
        if state in self.state_count:
            self.state_count[state] += 1
        else:
            self.state_count[state] = 1
Пример #6
0
import time
import numpy as np
import torch

from config import Config
from game.chess_env import ChessEnv
from game.features import board_to_feature
from network.policy_network import PolicyValNetwork_Giraffe
from game.stockfish import Stockfish
stockfish = Stockfish()


class Node(object):
    ''' Represent and store statistics of each node in the search tree.
    #arguments: 
        ChessEnv: Chess env object,
        explore_factor: float number used as hyperparameter to control  range of exploration ,
        init_W: initial cumulative value function 'W',
        init_N: initial visit action counter, 
        init_P: inital probability distribution for actions space,
        parent: parent node which is another instance of the class Node,
        child_id = best child index. 
    '''
    def __init__(self,
                 env: ChessEnv,
                 explore_factor,
                 init_W=np.zeros((Config.d_out, )),
                 init_N=np.zeros((Config.d_out, )),
                 init_P=np.ones((Config.d_out, )) * (1 / Config.d_out),
                 parent=None,
                 child_id=None):