def reset(self): ''' Reset state, go_board, curr_player, prev_player_passed, done, return state ''' self.state_ = gogame.init_state(self.size) self.done = False return np.copy(self.state_)
def __init__(self, size, komi=0, reward_method='real'): ''' @param reward_method: either 'heuristic' or 'real' heuristic: gives # black pieces - # white pieces. real: gives 0 for in-game move, 1 for winning, -1 for losing, 0 for draw, all from black player's perspective ''' self.size = size self.komi = komi self.state_ = gogame.init_state(size) self.reward_method = RewardMethod(reward_method) self.observation_space = gym.spaces.Box(np.float32(0), np.float32(govars.NUM_CHNLS), shape=(govars.NUM_CHNLS, size, size)) self.action_space = gym.spaces.Discrete(gogame.action_size( self.state_)) self.done = False
def exceute_episode(self): train_examples = [] current_player = 1 state = gogame.init_state(self.args['boardSize']) while True: #print("while True") canonical_board = gogame.canonical_form(state) self.mcts = MCTS(self.game, self.model, self.args) root = self.mcts.run(self.model, canonical_board, to_play=1) action_probs = [ 0 for _ in range((self.args['boardSize'] * self.args['boardSize']) + 1) ] for k, v in root.children.items(): action_probs[k] = v.visit_count action_probs = action_probs / np.sum(action_probs) train_examples.append( (canonical_board, current_player, action_probs)) action = root.select_action(temperature=1) state = gogame.next_state(state, action, canonical=False) current_player = -current_player reward = gogame.winning( state) * current_player if gogame.game_ended(state) else None if reward is not None: ret = [] for hist_state, hist_current_player, hist_action_probs in train_examples: # [Board, currentPlayer, actionProbabilities, Reward] tfBoard = np.array( [hist_state[0], hist_state[1], hist_state[3]]).transpose().tolist() #ret.append(np.array([tfBoard,tfBoard, hist_action_probs, reward * ((-1) ** (hist_current_player != current_player))])) ret.append( (tfBoard, hist_action_probs, reward * ((-1)**(hist_current_player != current_player)))) return ret
def getInitBoard(self): # return initial board (numpy board) return gogame.init_state(self.size)