Exemplo n.º 1
0
    def test_get_liberties(self):
        gs = simple_board()
        pp = Preprocess(["liberties"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        # todo - test liberties when > 8

        one_hot_liberties = np.zeros((gs.size, gs.size, 8))
        # black piece at (4,4) has a single liberty: (4,3)
        one_hot_liberties[4, 4, 0] = 1

        # the black group in the top left corner has 2 liberties
        one_hot_liberties[0, 0:3, 1] = 1
        #     .. as do the white pieces on the left and right of the eye
        one_hot_liberties[3, 4, 1] = 1
        one_hot_liberties[5, 4, 1] = 1

        # the white group in the top left corner has 3 liberties
        one_hot_liberties[1, 0:2, 2] = 1
        #     ...as does the white piece at (4,5)
        one_hot_liberties[4, 5, 2] = 1
        #     ...and the black pieces on the sides of the eye
        one_hot_liberties[3, 3, 2] = 1
        one_hot_liberties[5, 3, 2] = 1

        # the black piece at (4,2) has 4 liberties
        one_hot_liberties[4, 2, 3] = 1

        for i in range(8):
            self.assertTrue(
                np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
                "bad expectation: stones with %d liberties" % (i + 1))
Exemplo n.º 2
0
    def test_get_legal(self):
        gs = simple_board()
        pp = Preprocess(["legal"])
        feature = pp.state_to_tensor(gs)[0,
                                         0]  # 1D tensor; no need to transpose

        expectation = np.zeros((gs.size, gs.size))
        for (x, y) in gs.get_legal_moves():
            expectation[x, y] = 1
        self.assertTrue(np.all(expectation == feature))
Exemplo n.º 3
0
    def test_get_self_atari_size(self):
        gs = self_atari_board()
        pp = Preprocess(["self_atari_size"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_self_atari = np.zeros((gs.size, gs.size, 8))
        # self atari of size 1 at position 0,0
        one_hot_self_atari[0, 0, 0] = 1
        # self atari of size 3 at position 3,4
        one_hot_self_atari[3, 4, 2] = 1

        self.assertTrue(np.all(feature == one_hot_self_atari))
Exemplo n.º 4
0
    def test_get_sensibleness(self):
        # TODO - there are no legal eyes at the moment

        gs = simple_board()
        pp = Preprocess(["sensibleness"])
        feature = pp.state_to_tensor(gs)[0,
                                         0]  # 1D tensor; no need to transpose

        expectation = np.zeros((gs.size, gs.size))
        for (x, y) in gs.get_legal_moves():
            if not (gs.is_eye((x, y), go.WHITE)):
                expectation[x, y] = 1
        self.assertTrue(np.all(expectation == feature))
Exemplo n.º 5
0
    def test_get_self_atari_size_cap(self):
        gs = capture_board()
        pp = Preprocess(["self_atari_size"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_self_atari = np.zeros((gs.size, gs.size, 8))
        # self atari of size 1 at the ko position and just below it
        one_hot_self_atari[4, 5, 0] = 1
        one_hot_self_atari[3, 6, 0] = 1
        # self atari of size 3 at bottom corner
        one_hot_self_atari[6, 6, 2] = 1

        self.assertTrue(np.all(feature == one_hot_self_atari))
Exemplo n.º 6
0
    def test_get_ladder_capture(self):
        gs, moves = parseboard.parse(". . . . . . .|"
                                     "B W a . . . .|"
                                     ". B . . . . .|"
                                     ". . . . . . .|"
                                     ". . . . . . .|"
                                     ". . . . . W .|")
        pp = Preprocess(["ladder_capture"])
        feature = pp.state_to_tensor(gs)[0,
                                         0]  # 1D tensor; no need to transpose

        expectation = np.zeros((gs.size, gs.size))
        expectation[moves['a']] = 1

        self.assertTrue(np.all(expectation == feature))
Exemplo n.º 7
0
def run(player_RL, player_SL, out_pth, n_training_pairs, batch_size,
        bd_size, features):
    n_features = Preprocess(features).output_dim
    tmp_file = os.path.join(os.path.dirname(out_pth), ".tmp." + os.path.basename(out_pth))
    h5f = h5py.File(tmp_file, 'w')
    
    h5_states, h5_winners = init_hdf5(out_pth, n_features,
                                      bd_size, h5f)
    next_idx = 0
    n_pairs = 0
    while True:  # n in xrange(n_training_pairs / batch_size):
        X, winners = play_batch(player_RL, player_SL, batch_size,
                                features)
        if X is not None:
            try:
                # if next_idx >= len(h5_states):
                h5_states.resize((next_idx + batch_size, n_features, bd_size, bd_size))
                h5_winners.resize((next_idx + batch_size, 1))
                h5_states[next_idx:] = X
                h5_winners[next_idx:] = winners
                next_idx += batch_size
            except Exception as e:
                warnings.warn("Unknown error occured during batch save to HDF5 "
                    "file: {}".format(out_pth))
                raise e
        n_pairs += 1
        if n_pairs >= n_training_pairs / batch_size:
            break
    h5f.close()
    os.rename(tmp_file, out_pth)
    return
Exemplo n.º 8
0
    def test_get_turns_since(self):
        gs = simple_board()
        pp = Preprocess(["turns_since"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_turns = np.zeros((gs.size, gs.size, 8))

        rev_moves = gs.history[::-1]

        for x in range(gs.size):
            for y in range(gs.size):
                if gs.board[x, y] != go.EMPTY:
                    # find most recent move at x, y
                    age = rev_moves.index((x, y))
                    one_hot_turns[x, y, min(age, 7)] = 1

        self.assertTrue(np.all(feature == one_hot_turns))
Exemplo n.º 9
0
    def test_get_ladder_escape(self):
        # On this board, playing at 'a' is ladder escape because there is a breaker on the right.
        gs, moves = parseboard.parse(". B B . . . .|"
                                     "B W a . . . .|"
                                     ". B . . . . .|"
                                     ". . . . . W .|"
                                     ". . . . . . .|"
                                     ". . . . . . .|")
        pp = Preprocess(["ladder_escape"])
        gs.current_player = go.WHITE
        feature = pp.state_to_tensor(gs)[0,
                                         0]  # 1D tensor; no need to transpose

        expectation = np.zeros((gs.size, gs.size))
        expectation[moves['a']] = 1

        self.assertTrue(np.all(expectation == feature))
Exemplo n.º 10
0
    def test_get_capture_size(self):
        gs = capture_board()
        pp = Preprocess(["capture_size"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        score_before = gs.num_white_prisoners
        one_hot_capture = np.zeros((gs.size, gs.size, 8))
        # there is no capture available; all legal moves are zero-capture
        for (x, y) in gs.get_legal_moves():
            copy = gs.copy()
            copy.do_move((x, y))
            num_captured = copy.num_white_prisoners - score_before
            one_hot_capture[x, y, min(7, num_captured)] = 1

        for i in range(8):
            self.assertTrue(
                np.all(feature[:, :, i] == one_hot_capture[:, :, i]),
                "bad expectation: capturing %d stones" % i)
Exemplo n.º 11
0
    def test_get_liberties_after_cap(self):
        """A copy of test_get_liberties_after but where captures are imminent
        """
        gs = capture_board()
        pp = Preprocess(["liberties_after"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_liberties = np.zeros((gs.size, gs.size, 8))

        for (x, y) in gs.get_legal_moves():
            copy = gs.copy()
            copy.do_move((x, y))
            libs = copy.liberty_counts[x, y]
            one_hot_liberties[x, y, min(libs - 1, 7)] = 1

        for i in range(8):
            self.assertTrue(
                np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
                "bad expectation: stones with %d liberties after move" %
                (i + 1))
Exemplo n.º 12
0
    def test_get_board(self):
        gs = simple_board()
        pp = Preprocess(["board"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        white_pos = np.asarray([[0, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0],
                                [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0],
                                [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 1, 0, 0],
                                [0, 0, 0, 0, 0, 0, 0]])
        black_pos = np.asarray([[1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0],
                                [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0],
                                [0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 0, 0, 0],
                                [0, 0, 0, 0, 0, 0, 0]])
        empty_pos = np.ones((gs.size, gs.size)) - (white_pos + black_pos)

        # check number of planes
        self.assertEqual(feature.shape, (gs.size, gs.size, 3))
        # check return value against hand-coded expectation
        # (given that current_player is white)
        self.assertTrue(
            np.all(feature == np.dstack((white_pos, black_pos, empty_pos))))
Exemplo n.º 13
0
    def test_get_liberties_after(self):
        gs = simple_board()
        pp = Preprocess(["liberties_after"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_liberties = np.zeros((gs.size, gs.size, 8))

        # TODO (?) hand-code?
        for (x, y) in gs.get_legal_moves():
            copy = gs.copy()
            copy.do_move((x, y))
            libs = copy.liberty_counts[x, y]
            if libs < 7:
                one_hot_liberties[x, y, libs - 1] = 1
            else:
                one_hot_liberties[x, y, 7] = 1

        for i in range(8):
            self.assertTrue(
                np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
                "bad expectation: stones with %d liberties after move" %
                (i + 1))
Exemplo n.º 14
0
    def test_feature_concatenation(self):
        gs = simple_board()
        pp = Preprocess(["board", "sensibleness", "capture_size"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        expectation = np.zeros((gs.size, gs.size, 3 + 1 + 8))

        # first three planes: board
        expectation[:, :, 0] = (gs.board == go.WHITE) * 1
        expectation[:, :, 1] = (gs.board == go.BLACK) * 1
        expectation[:, :, 2] = (gs.board == go.EMPTY) * 1

        # 4th plane: sensibleness (as in test_get_sensibleness)
        for (x, y) in gs.get_legal_moves():
            if not (gs.is_eye((x, y), go.WHITE)):
                expectation[x, y, 3] = 1

        # 5th through 12th plane: capture size (all zero-capture)
        for (x, y) in gs.get_legal_moves():
            expectation[x, y, 4] = 1

        self.assertTrue(np.all(expectation == feature))
Exemplo n.º 15
0
    def __init__(self, feature_list, **kwargs):

        """Create a Neural Network Object that preprocesses according to feature_list and
        uses a neural network specified by keyword arguments (using subclass' create_network())
        optional argument: init_network (boolean). If set to False, skips initializing
        self.model and self.forward and the calling function should set them.
        """

        self.preprocessor = Preprocess(feature_list)
        kwargs["input_dim"] = self.preprocessor.output_dim

        if kwargs.get('init_network', True):
            # self.__class__ refers to the subclass so that subclasses only
            # need to override create_network()
            self.model = self.__class__.create_network(**kwargs)

            # self.forward is a lambda function wrapping a Keras function
            self.forward = self._model_forward()
Exemplo n.º 16
0
 def __init__(self, features):
     self.feature_processor = Preprocess(features)
     self.n_features = self.feature_processor.output_dim
Exemplo n.º 17
0
class HDF5_converter:

    def __init__(self, features):
        self.feature_processor = Preprocess(features)
        self.n_features = self.feature_processor.output_dim

    """ Read the given SGF file into an iterable of (input,output) pairs for neural network training
        Each input is a GameState converted into one-hot neural net features
        Each output is an action as an (x,y) pair (passes are skipped)
        If this game's size does not match board_size, a SizeMismatchError is raised """
    def convert_sgf_to_gamestate_iter(self, file_name, board_size):

        with open(file_name, 'r') as file_object:
            state_action_iterator = sgf_iter_states(file_object.read(), include_end=False)

        for (state, move, player) in state_action_iterator:
            if state.size != board_size:
                raise SizeMismatchError()
            if move != go.PASS_MOVE:
                nn_input = self.feature_processor.state_to_tensor(state)
                yield (nn_input, move)

    """ Convert all files in the iterable sgf_files into an hdf5 group to be stored in hdf5_file
        Arguments:
            - sgf_files : an iterable of relative or absolute paths to SGF files
            - hdf5_file : the name of the HDF5 where features will be saved
            - board_size : side length of board of games that are loaded

            - ignore_errors : if True, issues a Warning when there is an unknown
                exception rather than halting. Note that sgf.ParseException and
                go.IllegalMove exceptions are always skipped

            The resulting file has the following properties:
                states  : dataset with shape (n_data, n_features, board width, board height)
                actions : dataset with shape (n_data, 2) (actions are stored as x,y tuples of
                          where the move was played)
                file_offsets : group mapping from filenames to tuples of (index, length)

            For example, to find what positions in the dataset come from 'test.sgf':
                index, length = file_offsets['test.sgf']
                test_states = states[index:index+length]
                test_actions = actions[index:index+length]
     """
    # TODO - also save feature list
    def sgfs_to_hdf5(self, sgf_files, hdf5_file, board_size=19, ignore_errors=True, verbose=False):

        # make a hidden temporary file in case of a crash.
        # on success, this is renamed to hdf5_file
        tmp_file = os.path.join(os.path.dirname(hdf5_file), ".tmp." + os.path.basename(hdf5_file))
        h5file = h5.File(tmp_file, 'w')

        try:
            # see http://docs.h5py.org/en/latest/high/group.html#Group.create_dataset
            # h5py.require_dataset() : Open a dataset if exists, otherwise, create it.
            # h5py.create_dataset() : Create a dataset, if exists, overwrite it.
            # h5py.require_group() :
            # h5py.create_group() :
            states = h5file.require_dataset(
                'states',
                dtype=np.uint8,
                shape=(1, self.n_features, board_size, board_size),
                maxshape=(None, self.n_features, board_size, board_size),  # 'None' == arbitrary size
                exact=False,  # allow non-uint8 datasets to be loaded, coerced to uint8
                chunks=(64, self.n_features, board_size, board_size),  # approximately 1MB chunks
                compression="lzf")

            actions = h5file.require_dataset(
                'actions',
                dtype=np.uint8,
                shape=(1, 2),
                maxshape=(None, 2),
                exact=False,
                chunks=(1024, 2),
                compression="lzf")

            # 'file_offsets' is an HDF5 group so that 'file_name in file_offsets' is fast
            file_offsets = h5file.require_group('file_offsets')

            if verbose:
                print("created HDF5 dataset in {}".format(tmp_file))

            next_file_index = 0
            for file_name in sgf_files:
                if verbose:
                    print(file_name)
                # count number of state/action pairs yielded by this game
                n_pairs = 0
                start_file_index = next_file_index
                try:
                    for state, move in self.convert_sgf_to_gamestate_iter(file_name, board_size):
                        if next_file_index >= len(states):
                            states.resize((next_file_index + 1, self.n_features, board_size, board_size))
                            actions.resize((next_file_index + 1, 2))
                        states[next_file_index] = state
                        actions[next_file_index] = move
                        n_pairs += 1
                        next_file_index += 1
                except go.IllegalMove:
                    warnings.warn("Illegal Move encountered in %s\n"
                                  "\tdropping the remainder of the game" % file_name)
                except sgf.ParseException:
                    warnings.warn("Could not parse %s\n\tdropping game" % file_name)
                except SizeMismatchError:
                    warnings.warn("Skipping %s; wrong board size" % file_name)
                except Exception as e:
                    # catch everything else
                    if ignore_errors:
                        warnings.warn("Unkown exception with file %s\n\t%s" % (file_name, e), stacklevel=2)
                    else:
                        raise e
                finally:
                    if n_pairs > 0:
                        # '/' has special meaning in HDF5 key names, so they
                        # are replaced with ':' here
                        file_name_key = file_name.replace('/', ':')
                        file_offsets[file_name_key] = [start_file_index, n_pairs]
                        if verbose:
                            print("\t%d state/action pairs extracted" % n_pairs)
                    elif verbose:
                        print("\t-no usable data-")
        except Exception as e:
            print("sgfs_to_hdf5 failed")
            print e
            os.remove(tmp_file)
            raise e

        if verbose:
            print("finished. renaming %s to %s" % (tmp_file, hdf5_file))

        # processing complete; rename tmp_file to hdf5_file
        h5file.close()
        os.rename(tmp_file, hdf5_file)
Exemplo n.º 18
0
def play_batch(player_RL, player_SL, batch_size, features):
    """Play a batch of games in parallel and return one training pair
    from each game.
    """

    def do_move(states, moves):
        for st, mv in zip(states, moves):
            if not st.is_end_of_game:
                # Only do more moves if not end of game already
                st.do_move(mv)
        return states

    def do_rand_move(states, player, player_RL):
        """Do a uniform-random move over legal moves and record info for
        training. Only gets called once per game.
        """
        colors = [st.current_player for st in states]  # Record player color
        legal_moves = [st.get_legal_moves() for st in states]
        rand_moves = [lm[np.random.choice(len(lm))] for lm in legal_moves]
        states = do_move(states, rand_moves)
        player = player_RL
        X_list = [st.copy() for st in states]  # For later 1hot preprocessing
        return X_list, colors, states, player

    def convert(X_list, preprocessor):
        """Convert states to 1-hot and concatenate. X's are game state objects.
        """
        states = np.concatenate(
            [preprocessor.state_to_tensor(X) for X in X_list], axis=0)
        return states

    # Lists of game training pairs (1-hot)
    preprocessor = Preprocess(features)
    player = player_SL
    states = [GameState() for i in xrange(batch_size)]
    # Randomly choose turn to play uniform random. Move prior will be from SL
    # policy. Moves after will be from RL policy.
    i_rand_move = np.random.choice(range(450))
    X_list = None
    winners = None
    turn = 0
    while True:
        # Do moves (black)
        if turn == i_rand_move:
            # Make random move, then switch from SL to RL policy
            X_list, colors, states, player = do_rand_move(states, player,
                                                          player_RL)
        else:
            # Get moves (batch)
            moves_black = player.get_moves(states)
            # Do moves (black)
            states = do_move(states, moves_black)
        turn += 1
        # Do moves (white)
        if turn == i_rand_move:
            # Make random move, then switch from SL to RL policy
            X_list, colors, states, player = do_rand_move(states, player,
                                                          player_RL)
        else:
            moves_white = player.get_moves(states)
            states = do_move(states, moves_white)
        turn += 1
        # If all games have ended, we're done. Get winners.
        done = [st.is_end_of_game or st.turns_played > 500 for st in states]
        print turn
        if all(done):
            break
    # Concatenate training examples
    X = None
    if X_list is not None:
        X = convert(X_list, preprocessor)
    winners = np.array([st.get_winner() for st in states]).reshape(batch_size, 1)
    return X, winners