def value_policy(board: chess.Board): env = ChessEnv(board) game_over, score = env.is_game_over() if game_over: return score, [] stockfish = Stockfish() value = stockfish.stockfish_eval(env.board, timeout=100) next_states = [] for move in env.board.legal_moves: board_copy = env.board.copy() board_copy.push(move) next_states.append(board_copy) actions_value = [] for state in next_states: actions_value.append(evaluate_state(state)) policy = softmax(actions_value) index_list = [Config.MOVETOINDEX[move.uci()] for move in env.board.legal_moves] map = np.zeros((5120,)) for index, pi in zip(index_list, policy): map[index] = pi assert policy.sum() > 0.999 return value, map
def __init__(self, board=None): self.board = board self.num_halfmoves = 0 self.winner = None # type: Winner self.resigned = False self.result = None self.state_count = dict() self.stockfish = Stockfish()
def pretrain(model): feature_batch = [] targets_batch = [] board_positions = get_board_position() shuffle(board_positions) print("Pretraining on {} board positions...".format(len(board_positions))) stockfish = Stockfish() for batch in range(Config.PRETRAIN_EPOCHS): for index, board_position in enumerate(board_positions): if (index + 1) % Config.minibatch_size != 0: feature_batch.append(board_to_feature(board_position)) targets_batch.append( stockfish.stockfish_eval(board_position, 10)) else: feature_batch = torch.FloatTensor(feature_batch) targets_batch = Variable(torch.FloatTensor(targets_batch)) do_backprop(feature_batch, targets_batch, model) feature_batch = [] targets_batch = [] print("Completed batch {} of {}".format(batch, Config.PRETRAIN_EPOCHS))
def pretrain(model, boards): iters = 0 feature_batch = [] targets_val_batch = [] targets_pol_batch = [] shuffle(boards) print("Pretraining on {} board positions...".format(len(boards))) stockfish = Stockfish() for batch in range(Config.PRETRAIN_EPOCHS): for index, board_position in enumerate(boards): if (index + 1) % Config.minibatch_size != 0: try: value, policy, board = board_position except: pass targets_pol_batch.append(policy) targets_val_batch.append(value) print(index) feature_batch.append(board_to_feature(board)) else: feature_batch = torch.FloatTensor(feature_batch) targets_val_batch = Variable( torch.FloatTensor(targets_val_batch)) targets_pol_batch = Variable( torch.FloatTensor(targets_pol_batch)) do_backprop(feature_batch, targets_val_batch, targets_pol_batch, model, iters) iters = iters + 1 feature_batch = [] targets_val_batch = [] targets_pol_batch = [] print("Completed batch {} of {}".format(batch, Config.PRETRAIN_EPOCHS))
class ChessEnv: def __init__(self, board=None): self.board = board self.num_halfmoves = 0 self.winner = None # type: Winner self.resigned = False self.result = None self.state_count = dict() self.stockfish = Stockfish() def reset(self): self.board = chess.Board() self.num_halfmoves = 0 self.winner = None self.resigned = False # count first board state self.state_count = dict() transposition = self.board._transposition_key() self.state_count[transposition] = 1 return self def update(self, board): self.board = chess.Board(board) self.winner = None self.resigned = False return self @property def done(self): return self.winner is not None @property def white_won(self): return self.winner == Winner.white @property def white_to_move(self): return self.board.turn == chess.WHITE @property def repetition(self): return self.state_count[self.board._transposition_key] def step(self, action: str, check_over=True): """ :param action: :param check_over: :return: """ if check_over and action is None: self._resign() return self.board.push_uci(action) self.update_state_count() self.num_halfmoves += 1 if check_over and self.board.result(claim_draw=True) != "*": # print('Board resultd') # print(self.board.result(claim_draw=True)) self._game_over() def _game_over(self): if self.winner is None: self.result = self.board.result(claim_draw=True) if self.result == '1-0': self.winner = Winner.white elif self.result == '0-1': self.winner = Winner.black else: self.winner = Winner.draw def is_game_over(self, moves=0, res_check=False, testing_flag=False) -> tuple: if testing_flag: return True, 0 if self.board.is_game_over(): score = self.board.result() # print(score) if score == '0-1': return True, -Config.GAME_SCORE if score == '1/2-1/2': return True, 0 if score == '1-0': return True, Config.GAME_SCORE elif (moves > Config.RESIGN_CHECK_MIN) and ( not moves % Config.RESIGN_CHECK_FREQ) and res_check: return self.stockfish.check_resignation(self.board) return False, None def _resign(self): self.resigned = True if self.white_to_move: # WHITE RESIGNED! self.winner = Winner.black self.result = "0-1" else: self.winner = Winner.white self.result = "1-0" def adjudicate(self): score = self.testeval(absolute=True) if abs(score) < 0.01: self.winner = Winner.draw self.result = "1/2-1/2" elif score > 0: self.winner = Winner.white self.result = "1-0" else: self.winner = Winner.black self.result = "0-1" def ending_average_game(self): self.winner = Winner.draw self.result = "1/2-1/2" def copy(self): env = copy.copy(self) env.board = copy.copy(self.board) return env def render(self): print("\n") print(self.board) print("\n") @property def observation(self): return self.board.fen() # returns list of legal moves in UCI format. @property def legal_moves(self): return list(self.board.legal_moves) def deltamove(self, fen_next): moves = list(self.board.legal_moves) for mov in moves: self.board.push(mov) fee = self.board.fen() self.board.pop() if fee == fen_next: return mov.uci() return None def replace_tags(self): return replace_tags_board(self.board.fen()) def canonical_input_planes(self): return canon_input_planes(self.board.fen()) def testeval(self, absolute=False) -> float: return testeval(self.board.fen(), absolute) # def get_planes(self): # move_count_plane = np.full((8,8), self.num_halfmoves, dtype=np.float32) # player_colour_plane = np.full((8,8),(self.num_halfmoves%2)+1,dtype = np.float32) # 1 when white, 0 when black # # piece_planes, aux_planes = canonical_input_planes() # rep_planes = repetition_planes(self) # curr_planes = np.vstack((piece_planes,rep_planes,player_colour_plane,move_count_plane,aux_planes)) # assert curr_planes.shape == (21,8,8) # return curr_planes # returns 2 planes, one for each repetition of state def repetition_planes(self): state = self.board._transposition_key() if self.state_count[state] == 1: rep1 = np.full([8, 8], 1, dtype=np.float32) rep2 = np.full([8, 8], 0, dtype=np.float32) elif self.state_count[state] == 2: rep1 = np.full([8, 8], 1, dtype=np.float32) rep2 = np.full([8, 8], 1, dtype=np.float32) else: rep1 = np.full([8, 8], 0, dtype=np.float32) rep2 = np.full([8, 8], 0, dtype=np.float32) reps = np.vstack((rep1, rep2)) assert reps.shape == (2, 8, 8) return reps def update_state_count(self): state = self.board._transposition_key() if state in self.state_count: self.state_count[state] += 1 else: self.state_count[state] = 1
import time import numpy as np import torch from config import Config from game.chess_env import ChessEnv from game.features import board_to_feature from network.policy_network import PolicyValNetwork_Giraffe from game.stockfish import Stockfish stockfish = Stockfish() class Node(object): ''' Represent and store statistics of each node in the search tree. #arguments: ChessEnv: Chess env object, explore_factor: float number used as hyperparameter to control range of exploration , init_W: initial cumulative value function 'W', init_N: initial visit action counter, init_P: inital probability distribution for actions space, parent: parent node which is another instance of the class Node, child_id = best child index. ''' def __init__(self, env: ChessEnv, explore_factor, init_W=np.zeros((Config.d_out, )), init_N=np.zeros((Config.d_out, )), init_P=np.ones((Config.d_out, )) * (1 / Config.d_out), parent=None, child_id=None):