def test_ko_undo(self): gs, moves = parseboard.parse(". B . . . . .|" "B W B . . . .|" "W k W . . . .|" ". W . . . . .|" ". . . . . . .|" ". . . . a . .|" ". . . . . . .|") gs.set_current_player(go.BLACK) # Trigger ko at (1, 1) gs.do_move(moves['k']) ko = gs.get_ko_location() self.assertIsNotNone(ko) copy = gs.copy() self.equality_checks(gs, copy) with copy.try_stone(flatten_idx(moves['a'], gs.get_size())): self.inequality_checks(gs, copy) # Doing move at 'a' clears ko self.assertIsNone(copy.get_ko_location()) self.equality_checks(gs, copy) # Undoing move at 'a' resets ko self.assertEqual(copy.get_ko_location(), ko)
def _select_moves_and_normalize(self, nn_output, moves, size): """helper function to normalize a distribution over the given list of moves and return a list of (move, prob) tuples """ if len(moves) == 0: return [] move_indices = [flatten_idx(m, size) for m in moves] # get network activations at legal move locations distribution = nn_output[move_indices] distribution = distribution / distribution.sum() return zip(moves, distribution)
def do_move(states, states_prev, moves, X_list, y_list, player_color): bsize_flat = bsize * bsize for st, st_prev, mv, X, y in zip(states, states_prev, moves, X_list, y_list): if not st.is_end_of_game: # Only do more moves if not end of game already st.do_move(mv) if st.current_player != player_color and mv is not go.PASS_MOVE: # Convert move to one-hot state_1hot = preprocessor.state_to_tensor(st_prev) move_1hot = np.zeros(bsize_flat) move_1hot[flatten_idx(mv, bsize)] = 1 X.append(state_1hot) y.append(move_1hot) return states, X_list, y_list
def eval_state(self, state, moves=None): """Given a GameState object, returns a list of (action, probability) pairs according to the network outputs If a list of moves is specified, only those moves are kept in the distribution """ tensor = self.preprocessor.state_to_tensor(state) # run the tensor through the network network_output = self.forward(tensor) moves = moves or state.get_legal_moves() move_indices = [flatten_idx(m, state.size) for m in moves] # get network activations at legal move locations # note: may not be a proper distribution by ignoring illegal moves distribution = network_output[0][move_indices] distribution = distribution / distribution.sum() return zip(moves, distribution)
def test_merge_and_capture_undo(self): gs, moves = parseboard.parse(". . B B B . .|" ". B W W W B .|" ". B W B W B .|" ". B W c W B .|" ". B W B W B .|" ". B W W W B .|" ". . B B B . .|") gs.set_current_player(go.BLACK) copy = gs.copy() # Initial equality checks self.assertTrue(copy.sanity_check_groups()) self.equality_checks(gs, copy) with copy.try_stone(flatten_idx(moves['c'], gs.get_size())): self.assertTrue(copy.sanity_check_groups()) self.inequality_checks(gs, copy) # Move should now be undone - retry equality checks from above self.assertTrue(copy.sanity_check_groups()) self.equality_checks(gs, copy)
def test_hash_update_matches_actual_hash(self): gs = GameState(size=7) gs, moves = parseboard.parse("a x b . . . .|" "z c d . . . .|" ". . . . . . .|" ". . . y . . .|" ". . . . . . .|" ". . . . . . .|" ". . . . . . .|") # a,b,c,d are black, x,y,z,x are white move_order = ['a', 'x', 'b', 'y', 'c', 'z', 'd', 'x'] for m in move_order: move_1d = flatten_idx(moves[m], gs.get_size()) # 'Try' move and get hash with gs.try_stone(move_1d): hash1 = gs.get_hash() # Actually do move and get hash gs.do_move(moves[m]) hash2 = gs.get_hash() self.assertEqual(hash1, hash2)
def _make_training_pair(st, mv, preprocessor): # Convert move to one-hot st_tensor = preprocessor.state_to_tensor(st) mv_tensor = np.zeros((1, st.get_size() * st.get_size())) mv_tensor[(0, flatten_idx(mv, st.get_size()))] = 1 return (st_tensor, mv_tensor)