def test_get_board(self): gs = simple_board() pp = Preprocess(["board"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) white_pos = np.asarray([ [0, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0]]) black_pos = np.asarray([ [1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0], [0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0]]) empty_pos = np.ones((gs.size, gs.size)) - (white_pos + black_pos) # check number of planes self.assertEqual(feature.shape, (gs.size, gs.size, 3)) # check return value against hand-coded expectation # (given that current_player is white) self.assertTrue(np.all(feature == np.dstack((white_pos, black_pos, empty_pos))))
def test_get_self_atari_size(self): # TODO - at the moment there is no imminent self-atari for white gs = simple_board() pp = Preprocess(["self_atari_size"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) self.assertTrue(np.all(feature == np.zeros((gs.size, gs.size, 8))))
def test_get_liberties(self): gs = simple_board() pp = Preprocess(["liberties"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) # todo - test liberties when > 8 one_hot_liberties = np.zeros((gs.size, gs.size, 8)) # black piece at (4,4) has a single liberty: (4,3) one_hot_liberties[4, 4, 0] = 1 # the black group in the top left corner has 2 liberties one_hot_liberties[0, 0:3, 1] = 1 # .. as do the white pieces on the left and right of the eye one_hot_liberties[3, 4, 1] = 1 one_hot_liberties[5, 4, 1] = 1 # the white group in the top left corner has 3 liberties one_hot_liberties[1, 0:2, 2] = 1 # ...as does the white piece at (4,5) one_hot_liberties[4, 5, 2] = 1 # ...and the black pieces on the sides of the eye one_hot_liberties[3, 3, 2] = 1 one_hot_liberties[5, 3, 2] = 1 # the black piece at (4,2) has 4 liberties one_hot_liberties[4, 2, 3] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), "bad expectation: stones with %d liberties" % (i + 1))
def test_get_board(self): gs = simple_board() pp = Preprocess(["board"], size=7) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) white_pos = np.asarray([ [0, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0]]) black_pos = np.asarray([ [1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0], [0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0]]) empty_pos = np.ones((gs.get_size(), gs.get_size())) - (white_pos + black_pos) # check number of planes self.assertEqual(feature.shape, (gs.get_size(), gs.get_size(), 3)) # check return value against hand-coded expectation # (given that current_player is white) self.assertTrue(np.all(feature == np.dstack((white_pos, black_pos, empty_pos))))
def test_get_liberties(self): gs = simple_board() pp = Preprocess(["liberties"], size=7) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) # todo - test liberties when > 8 one_hot_liberties = np.zeros((gs.get_size(), gs.get_size(), 8)) # black piece at (4,4) has a single liberty: (4,3) one_hot_liberties[4, 4, 0] = 1 # the black group in the top left corner has 2 liberties one_hot_liberties[0, 0:3, 1] = 1 # .. as do the white pieces on the left and right of the eye one_hot_liberties[3, 4, 1] = 1 one_hot_liberties[5, 4, 1] = 1 # the white group in the top left corner has 3 liberties one_hot_liberties[1, 0:2, 2] = 1 # ...as does the white piece at (4,5) one_hot_liberties[4, 5, 2] = 1 # ...and the black pieces on the sides of the eye one_hot_liberties[3, 3, 2] = 1 one_hot_liberties[5, 3, 2] = 1 # the black piece at (4,2) has 4 liberties one_hot_liberties[4, 2, 3] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), "bad expectation: stones with %d liberties" % (i + 1))
def test_get_sensibleness(self): gs, moves = parseboard.parse("x B . . W . . . .|" "B B W . . W . . .|" ". W B B W W . . .|" ". B y B W W . . .|" ". B B z B W . . .|" ". . B B B W . . .|" ". . . . . . . . W|" ". . . . . . . . W|" ". . . . . . . W s|") gs.set_current_player(go.BLACK) pp = Preprocess(["sensibleness"], size=9) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.get_size(), gs.get_size()), dtype=int) for (x, y) in gs.get_legal_moves(): expectation[x, y] = 1 # 'x', 'y', and 'z' are eyes - remove them from 'sensible' moves expectation[moves['x']] = 0 expectation[moves['y']] = 0 expectation[moves['z']] = 0 # 's' is suicide - should not be legal expectation[moves['s']] = 0 self.assertTrue(np.all(expectation == feature))
def validate_feature_planes(verbose, dataset, model_features): """Verify that dataset's features match the model's expected features. """ if 'features' in dataset: dataset_features = dataset['features'][()] dataset_features = dataset_features.split(",") if len(dataset_features) != len(model_features) or \ any(df != mf for (df, mf) in zip(dataset_features, model_features)): raise ValueError( "Model JSON file expects features \n\t%s\n" "But dataset contains \n\t%s" % ("\n\t".join(model_features), "\n\t".join(dataset_features))) elif verbose: print( "Verified that dataset features and model features exactly match." ) else: # Cannot check each feature, but can check number of planes. n_dataset_planes = dataset["states"].shape[1] tmp_preprocess = Preprocess(model_features) n_model_planes = tmp_preprocess.get_output_dimension() if n_dataset_planes != n_model_planes: raise ValueError( "Model JSON file expects a total of %d planes from features \n\t%s\n" "But dataset contains %d planes" % (n_model_planes, "\n\t".join(model_features), n_dataset_planes)) elif verbose: print( "Verified agreement of number of model and dataset feature planes, but cannot " "verify exact match using old dataset format.")
def __init__(self, feature_list, **kwargs): """create a policy object that preprocesses according to feature_list and uses a neural network specified by keyword arguments (see create_network()) """ self.preprocessor = Preprocess(feature_list) kwargs["input_dim"] = self.preprocessor.output_dim self.model = CNNPolicy.create_network(**kwargs) self.forward = self._model_forward()
def test_get_legal(self): gs = simple_board() pp = Preprocess(["legal"]) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.size, gs.size)) for (x, y) in gs.get_legal_moves(): expectation[x, y] = 1 self.assertTrue(np.all(expectation == feature))
def test_get_legal(self): gs = simple_board() pp = Preprocess(["legal"], size=7) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.get_size(), gs.get_size())) for (x, y) in gs.get_legal_moves(): expectation[x, y] = 1 self.assertTrue(np.all(expectation == feature))
def test_get_self_atari_size(self): gs = self_atari_board() pp = Preprocess(["self_atari_size"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_self_atari = np.zeros((gs.size, gs.size, 8)) # self atari of size 1 at position 0,0 one_hot_self_atari[0, 0, 0] = 1 # self atari of size 3 at position 3,4 one_hot_self_atari[3, 4, 2] = 1 self.assertTrue(np.all(feature == one_hot_self_atari))
def test_get_sensibleness(self): # TODO - there are no legal eyes at the moment gs = simple_board() pp = Preprocess(["sensibleness"]) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.size, gs.size)) for (x, y) in gs.get_legal_moves(): if not (gs.is_eye((x, y), go.WHITE)): expectation[x, y] = 1 self.assertTrue(np.all(expectation == feature))
def test_get_self_atari_size(self): gs = self_atari_board() pp = Preprocess(["self_atari_size"], size=7) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_self_atari = np.zeros((gs.get_size(), gs.get_size(), 8)) # self atari of size 1 at position 0,0 one_hot_self_atari[0, 0, 0] = 1 # self atari of size 3 at position 3,4 one_hot_self_atari[3, 4, 2] = 1 self.assertTrue(np.all(feature == one_hot_self_atari))
def test_get_self_atari_size_cap(self): gs = capture_board() pp = Preprocess(["self_atari_size"], size=7) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_self_atari = np.zeros((gs.get_size(), gs.get_size(), 8)) # self atari of size 1 at the ko position and just below it one_hot_self_atari[4, 5, 0] = 1 one_hot_self_atari[3, 6, 0] = 1 # self atari of size 3 at bottom corner one_hot_self_atari[6, 6, 2] = 1 self.assertTrue(np.all(feature == one_hot_self_atari))
def test_get_self_atari_size_cap(self): gs = capture_board() pp = Preprocess(["self_atari_size"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_self_atari = np.zeros((gs.size, gs.size, 8)) # self atari of size 1 at the ko position and just below it one_hot_self_atari[4, 5, 0] = 1 one_hot_self_atari[3, 6, 0] = 1 # self atari of size 3 at bottom corner one_hot_self_atari[6, 6, 2] = 1 self.assertTrue(np.all(feature == one_hot_self_atari))
def test_get_ladder_capture(self): gs, moves = parseboard.parse(". . . . . . .|" "B W a . . . .|" ". B . . . . .|" ". . . . . . .|" ". . . . . . .|" ". . . . . W .|") pp = Preprocess(["ladder_capture"], size=7) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.get_size(), gs.get_size())) expectation[moves['a']] = 1 self.assertTrue(np.all(expectation == feature))
def test_get_capture_size(self): # TODO - at the moment there is no imminent capture gs = simple_board() pp = Preprocess(["capture_size"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_capture = np.zeros((gs.size, gs.size, 8)) # there is no capture available; all legal moves are zero-capture for (x, y) in gs.get_legal_moves(): one_hot_capture[x, y, 0] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_capture[:, :, i]), "bad expectation: capturing %d stones" % i)
def test_get_ladder_escape(self): # On this board, playing at 'a' is ladder escape because there is a breaker on the right. gs, moves = parseboard.parse(". B B . . . .|" "B W a . . . .|" ". B . . . . .|" ". . . . . W .|" ". . . . . . .|" ". . . . . . .|") pp = Preprocess(["ladder_escape"], size=7) gs.set_current_player(go.WHITE) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.get_size(), gs.get_size())) expectation[moves['a']] = 1 self.assertTrue(np.all(expectation == feature))
def test_get_turns_since(self): gs = simple_board() pp = Preprocess(["turns_since"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_turns = np.zeros((gs.size, gs.size, 8)) rev_history = gs.history[::-1] # one plane per move for the last 7 for i in range(7): move = rev_history[i] one_hot_turns[move[0], move[1], i] = 1 # far back plane gets all other moves for move in rev_history[7:]: one_hot_turns[move[0], move[1], 7] = 1 self.assertTrue(np.all(feature == one_hot_turns))
def test_get_turns_since(self): gs = simple_board() pp = Preprocess(["turns_since"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_turns = np.zeros((gs.size, gs.size, 8)) rev_moves = gs.history[::-1] for x in range(gs.size): for y in range(gs.size): if gs.board[x, y] != go.EMPTY: # find most recent move at x, y age = rev_moves.index((x, y)) one_hot_turns[x, y, min(age, 7)] = 1 self.assertTrue(np.all(feature == one_hot_turns))
def __init__(self, feature_list, **kwargs): """create a neural net object that preprocesses according to feature_list and uses a neural network specified by keyword arguments (using subclass' create_network()) optional argument: init_network (boolean). If set to False, skips initializing self.model and self.forward and the calling function should set them. """ defaults = {"board": 19} defaults.update(kwargs) self.preprocessor = Preprocess(feature_list, size=defaults["board"]) kwargs["input_dim"] = self.preprocessor.get_output_dimension() if kwargs.get('init_network', True): # self.__class__ refers to the subclass so that subclasses only # need to override create_network() self.model = self.__class__.create_network(**kwargs) # self.forward is a lambda function wrapping a Keras function self.forward = self._model_forward()
def test_get_capture_size(self): gs = capture_board() pp = Preprocess(["capture_size"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) score_before = gs.num_white_prisoners one_hot_capture = np.zeros((gs.size, gs.size, 8)) # there is no capture available; all legal moves are zero-capture for (x, y) in gs.get_legal_moves(): copy = gs.copy() copy.do_move((x, y)) num_captured = copy.num_white_prisoners - score_before one_hot_capture[x, y, min(7, num_captured)] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_capture[:, :, i]), "bad expectation: capturing %d stones" % i)
def test_get_liberties_after_cap(self): """A copy of test_get_liberties_after but where captures are imminent """ gs = capture_board() pp = Preprocess(["liberties_after"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_liberties = np.zeros((gs.size, gs.size, 8)) for (x, y) in gs.get_legal_moves(): copy = gs.copy() copy.do_move((x, y)) libs = copy.liberty_counts[x, y] one_hot_liberties[x, y, min(libs - 1, 7)] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), "bad expectation: stones with %d liberties after move" % (i + 1))
def test_get_liberties_after(self): gs = simple_board() pp = Preprocess(["liberties_after"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_liberties = np.zeros((gs.size, gs.size, 8)) # TODO (?) hand-code? for (x, y) in gs.get_legal_moves(): copy = gs.copy() copy.do_move((x, y)) libs = copy.liberty_counts[x, y] if libs < 7: one_hot_liberties[x, y, libs - 1] = 1 else: one_hot_liberties[x, y, 7] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), "bad expectation: stones with %d liberties after move" % (i + 1))
def test_feature_concatenation(self): gs = simple_board() pp = Preprocess(["board", "sensibleness", "capture_size"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) expectation = np.zeros((gs.size, gs.size, 3 + 1 + 8)) # first three planes: board expectation[:, :, 0] = (gs.board == go.WHITE) * 1 expectation[:, :, 1] = (gs.board == go.BLACK) * 1 expectation[:, :, 2] = (gs.board == go.EMPTY) * 1 # 4th plane: sensibleness (as in test_get_sensibleness) for (x, y) in gs.get_legal_moves(): if not (gs.is_eye((x, y), go.WHITE)): expectation[x, y, 3] = 1 # 5th through 12th plane: capture size (all zero-capture) for (x, y) in gs.get_legal_moves(): expectation[x, y, 4] = 1 self.assertTrue(np.all(expectation == feature))
def test_two_escapes(self): gs, moves = parseboard.parse(". . X . . .|" ". X O a . .|" ". X c X . .|" ". O X b . .|" ". . O . . .|" ". . . . . .|") # place a white stone at c, and reset player to white gs.do_move(moves['c'], color=go.WHITE) gs.set_current_player(go.WHITE) pp = Preprocess(["ladder_escape"], size=6) gs.set_current_player(go.WHITE) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose # both 'a' and 'b' should be considered escape moves for white after 'O' at c expectation = np.zeros((gs.get_size(), gs.get_size())) expectation[moves['a']] = 1 expectation[moves['b']] = 1 self.assertTrue(np.all(expectation == feature))
def is_ladder_capture(state, move): pp = Preprocess(["ladder_capture"], size=state.get_size()) feature = pp.state_to_tensor(state).squeeze() return feature[move] == 1
class CNNPolicy(object): """uses a convolutional neural network to evaluate the state of the game and compute a probability distribution over the next action """ def __init__(self, feature_list, **kwargs): """create a policy object that preprocesses according to feature_list and uses a neural network specified by keyword arguments (see create_network()) """ self.preprocessor = Preprocess(feature_list) kwargs["input_dim"] = self.preprocessor.output_dim self.model = CNNPolicy.create_network(**kwargs) self.forward = self._model_forward() def _model_forward(self): """Construct a function using the current keras backend that, when given a batch of inputs, simply processes them forward and returns the output This is as opposed to model.compile(), which takes a loss function and training method. c.f. https://github.com/fchollet/keras/issues/1426 """ model_input = self.model.get_input(train=False) model_output = self.model.get_output(train=False) forward_function = K.function([model_input], [model_output]) # the forward_function returns a list of tensors # the first [0] gets the front tensor. # this tensor, however, has dimensions (1, width, height) # and we just want (width,height) hence the second [0] return lambda inpt: forward_function(inpt)[0][0] def batch_eval_state(self, state_gen, batch=16): """Given a stream of states in state_gen, evaluates them in batches to make best use of GPU resources. Returns: TBD (stream of results? that would break zip(). streaming pairs of pre-zipped (state, result)?) """ raise NotImplementedError() def eval_state(self, state): """Given a GameState object, returns a list of (action, probability) pairs according to the network outputs """ tensor = self.preprocessor.state_to_tensor(state) # run the tensor through the network network_output = self.forward([tensor]) # get network activations at legal move locations # note: may not be a proper distribution by ignoring illegal moves return [((x, y), network_output[x, y]) for (x, y) in state.get_legal_moves()] @staticmethod def create_network(**kwargs): """construct a convolutional neural network. Keword Arguments: - input_dim: depth of features to be processed by first layer (no default) - board: width of the go board to be processed (default 19) - filters_per_layer: number of filters used on every layer (default 128) - layers: number of convolutional steps (default 12) - filter_width_K: (where K is between 1 and <layers>) width of filter on layer K (default 3 except 1st layer which defaults to 5). Must be odd. """ defaults = { "board": 19, "filters_per_layer": 128, "layers": 12, "filter_width_1": 5 } # copy defaults, but override with anything in kwargs params = defaults params.update(kwargs) # create the network: # a series of zero-paddings followed by convolutions # such that the output dimensions are also board x board network = Sequential() # create first layer network.add(convolutional.Convolution2D( input_shape=(params["input_dim"], params["board"], params["board"]), nb_filter=params["filters_per_layer"], nb_row=params["filter_width_1"], nb_col=params["filter_width_1"], init='uniform', activation='relu', border_mode='same')) # create all other layers for i in range(2, params["layers"] + 1): # use filter_width_K if it is there, otherwise use 3 filter_key = "filter_width_%d" % i filter_width = params.get(filter_key, 3) network.add(convolutional.Convolution2D( nb_filter=params["filters_per_layer"], nb_row=filter_width, nb_col=filter_width, init='uniform', activation='relu', border_mode='same')) # the last layer maps each <filters_per_layer> featuer to a number network.add(convolutional.Convolution2D( nb_filter=1, nb_row=1, nb_col=1, init='uniform', border_mode='same')) # reshape output to be board x board network.add(Reshape((params["board"], params["board"]))) # softmax makes it into a probability distribution network.add(Activation('softmax')) return network @staticmethod def load_model(json_file): """create a new CNNPolicy object from the architecture specified in json_file """ with open(json_file, 'r') as f: object_specs = json.load(f) new_policy = CNNPolicy(object_specs['feature_list']) new_policy.model = model_from_json(object_specs['keras_model']) new_policy.forward = new_policy._model_forward() return new_policy def save_model(self, json_file): """write the network model and preprocessing features to the specified file """ # this looks odd because we are serializing a model with json as a string # then making that the value of an object which is then serialized as # json again. # It's not as crazy as it looks. A CNNPolicy has 2 moving parts - the # feature preprocessing and the neural net, each of which gets a top-level # entry in the saved file. Keras just happens to serialize models with JSON # as well. Note how this format makes load_model fairly clean as well. object_specs = { 'keras_model': self.model.to_json(), 'feature_list': self.preprocessor.feature_list } # use the json module to write object_specs to file with open(json_file, 'w') as f: json.dump(object_specs, f)
def play_batch(player_RL, player_SL, batch_size, features, i_rand_move, next_idx, sgf_path): """Play a batch of games in parallel and return one training pair from each game. As described in Silver et al, the method for generating value net training data is as follows: * pick a number between 1 and 450 * use the supervised-learning policy to play a game against itself up to that number of moves. * now go off-policy and pick a totally random move * play out the rest of the game with the reinforcement-learning policy * save the state that occurred *right after* the random move, * and the end result of the game, as the training pair """ def do_move(states, moves): for st, mv in zip(states, moves): if not st.is_end_of_game(): # Only do more moves if not end of game already st.do_move(mv) return states def do_rand_move(states): """Do a uniform-random move over legal moves and record info for training. Only gets called once per game. """ # get legal moves and play one at random legal_moves = [st.get_legal_moves() for st in states] rand_moves = [lm[np.random.choice(len(lm))] for lm in legal_moves] states = do_move(states, rand_moves) # copy all states, these are the generated training data training_state_list = [st.copy() for st in states ] # For later 1hot preprocessing return training_state_list, states def convert(state_list, preprocessor): """Convert states to 1-hot and concatenate. X's are game state objects. """ states = np.concatenate( [preprocessor.state_to_tensor(state) for state in state_list], axis=0) return states # Lists of game training pairs (1-hot) preprocessor = Preprocess(features) states = [GameState() for _ in xrange(batch_size)] # play player_SL moves for _ in xrange(i_rand_move - 1): # Get moves (batch) batch_moves = player_SL.get_moves(states) # Do moves (black) states = do_move(states, batch_moves) # remove games that are finished states = [state for state in states if not state.is_end_of_game()] # Make random move states_list, states = do_rand_move(states) # color is random move player color color = WHITE if i_rand_move % 2 == 0 else BLACK # play moves with player_RL till game ends while True: # Get moves (batch) batch_moves = player_RL.get_moves(states) # Do moves (black) states = do_move(states, batch_moves) # check if all games are finished done = [st.is_end_of_game() for st in states] if all(done): break if sgf_path is not None: # number different sgf sgf_id = next_idx for gm in states: # add leading '0' file_name = str(sgf_id) while len(file_name) < 10: file_name = '0' + file_name # determine winner winner_game = 'WHITE' if gm.get_winner_color( ) == WHITE else 'BLACK' random_player = 'WHITE' if color == WHITE else 'BLACK' # generate file name file_name += '_winner_' + winner_game + '_active-player_' + \ random_player + '_move_' + str(i_rand_move) + '.sgf' # save sgf save_gamestate_to_sgf(gm, sgf_path, file_name, result=winner_game + ' ' + str(i_rand_move)) # increment sgf id count sgf_id += 1 # Concatenate training examples training_states = convert(states_list, preprocessor) # get winners list relative to 'random move' player color (color) # winner BLACK & color Black -> WIN # winner WHITE & color WHITE -> WIN # winner BLACK & color WHITE -> LOSE # winner WHITE & color Black -> LOSE actual_batch_size = len(states) winners = np.array([ WIN if st.get_winner_color() == color else LOSE for st in states ]).reshape(actual_batch_size, 1) return training_states, winners
def __init__(self, features): self.feature_processor = Preprocess(features) self.n_features = self.feature_processor.output_dim
class game_converter: def __init__(self, features): self.feature_processor = Preprocess(features) self.n_features = self.feature_processor.output_dim def convert_game(self, file_name, bd_size): """Read the given SGF file into an iterable of (input,output) pairs for neural network training Each input is a GameState converted into one-hot neural net features Each output is an action as an (x,y) pair (passes are skipped) If this game's size does not match bd_size, a SizeMismatchError is raised """ with open(file_name, 'r') as file_object: state_action_iterator = sgf_iter_states(file_object.read(), include_end=False) for (state, move, player) in state_action_iterator: if state.size != bd_size: raise SizeMismatchError() if move != go.PASS_MOVE: nn_input = self.feature_processor.state_to_tensor(state) yield (nn_input, move) def sgfs_to_hdf5(self, sgf_files, hdf5_file, bd_size=19, ignore_errors=True, verbose=False): """Convert all files in the iterable sgf_files into an hdf5 group to be stored in hdf5_file Arguments: - sgf_files : an iterable of relative or absolute paths to SGF files - hdf5_file : the name of the HDF5 where features will be saved - bd_size : side length of board of games that are loaded - ignore_errors : if True, issues a Warning when there is an unknown exception rather than halting. Note that sgf.ParseException and go.IllegalMove exceptions are always skipped The resulting file has the following properties: states : dataset with shape (n_data, n_features, board width, board height) actions : dataset with shape (n_data, 2) (actions are stored as x,y tuples of where the move was played) file_offsets : group mapping from filenames to tuples of (index, length) For example, to find what positions in the dataset come from 'test.sgf': index, length = file_offsets['test.sgf'] test_states = states[index:index+length] test_actions = actions[index:index+length] """ # TODO - also save feature list # make a hidden temporary file in case of a crash. # on success, this is renamed to hdf5_file tmp_file = os.path.join(os.path.dirname(hdf5_file), ".tmp." + os.path.basename(hdf5_file)) h5f = h5.File(tmp_file, 'w') try: # see http://docs.h5py.org/en/latest/high/group.html#Group.create_dataset states = h5f.require_dataset( 'states', dtype=np.uint8, shape=(1, self.n_features, bd_size, bd_size), maxshape=(None, self.n_features, bd_size, bd_size), # 'None' dimension allows it to grow arbitrarily exact=False, # allow non-uint8 datasets to be loaded, coerced to uint8 chunks=(64, self.n_features, bd_size, bd_size), # approximately 1MB chunks compression="lzf") actions = h5f.require_dataset( 'actions', dtype=np.uint8, shape=(1, 2), maxshape=(None, 2), exact=False, chunks=(1024, 2), compression="lzf") # 'file_offsets' is an HDF5 group so that 'file_name in file_offsets' is fast file_offsets = h5f.require_group('file_offsets') if verbose: print("created HDF5 dataset in {}".format(tmp_file)) next_idx = 0 for file_name in sgf_files: if verbose: print(file_name) # count number of state/action pairs yielded by this game n_pairs = 0 file_start_idx = next_idx try: for state, move in self.convert_game(file_name, bd_size): if next_idx >= len(states): states.resize((next_idx + 1, self.n_features, bd_size, bd_size)) actions.resize((next_idx + 1, 2)) states[next_idx] = state actions[next_idx] = move n_pairs += 1 next_idx += 1 except go.IllegalMove: warnings.warn("Illegal Move encountered in %s\n\tdropping the remainder of the game" % file_name) except sgf.ParseException: warnings.warn("Could not parse %s\n\tdropping game" % file_name) except SizeMismatchError: warnings.warn("Skipping %s; wrong board size" % file_name) except Exception as e: # catch everything else if ignore_errors: warnings.warn("Unkown exception with file %s\n\t%s" % (file_name, e), stacklevel=2) else: raise e finally: if n_pairs > 0: # '/' has special meaning in HDF5 key names, so they are replaced with ':' here file_name_key = file_name.replace('/', ':') file_offsets[file_name_key] = [file_start_idx, n_pairs] if verbose: print("\t%d state/action pairs extracted" % n_pairs) elif verbose: print("\t-no usable data-") except Exception as e: print("sgfs_to_hdf5 failed") os.remove(tmp_file) raise e if verbose: print("finished. renaming %s to %s" % (tmp_file, hdf5_file)) # processing complete; rename tmp_file to hdf5_file h5f.close() os.rename(tmp_file, hdf5_file)
def make_training_pairs(player, opp, features, mini_batch_size, board_size=19): """Make training pairs for batch of matches, utilizing player.get_moves (parallel form of player.get_move), which calls `CNNPolicy.batch_eval_state`. Args: player -- player that we're always updating opp -- batch opponent feature_list -- game features to be one-hot encoded mini_batch_size -- number of games in mini-batch Return: X_list -- list of 1-hot board states associated with moves. y_list -- list of 1-hot moves associated with board states. winners -- list of winners associated with each game in batch """ def do_move(states, states_prev, moves, X_list, y_list, player_color): bsize_flat = bsize * bsize for st, st_prev, mv, X, y in zip(states, states_prev, moves, X_list, y_list): if not st.is_end_of_game: # Only do more moves if not end of game already st.do_move(mv) if st.current_player != player_color and mv is not go.PASS_MOVE: # Convert move to one-hot state_1hot = preprocessor.state_to_tensor(st_prev) move_1hot = np.zeros(bsize_flat) move_1hot[flatten_idx(mv, bsize)] = 1 X.append(state_1hot) y.append(move_1hot) return states, X_list, y_list # Lists of game training pairs (1-hot) X_list = [list() for _ in xrange(mini_batch_size)] y_list = [list() for _ in xrange(mini_batch_size)] preprocessor = Preprocess(features) bsize = player.policy.model.input_shape[-1] states = [GameState(size=board_size) for i in xrange(mini_batch_size)] # Randomly choose who goes first (i.e. color of 'player') player_color = np.random.choice([go.BLACK, go.WHITE]) player1, player2 = (player, opp) if player_color == go.BLACK else \ (opp, player) while True: # Cache states before moves states_prev = [st.copy() for st in states] # Get moves (batch) moves_black = player1.get_moves(states) # Do moves (black) states, X_list, y_list = do_move(states, states_prev, moves_black, X_list, y_list, player_color) # Do moves (white) moves_white = player2.get_moves(states) states, X_list, y_list = do_move(states, states_prev, moves_white, X_list, y_list, player_color) # If all games have ended, we're done. Get winners. done = [st.is_end_of_game for st in states] if all(done): break winners = [st.get_winner() for st in states] # Concatenate tensors across turns within each game for i in xrange(mini_batch_size): X_list[i] = np.concatenate(X_list[i], axis=0) y_list[i] = np.vstack(y_list[i]) return X_list, y_list, winners
def run_training(cmd_line_args=None): """Run training. command-line args may be passed in as a list """ import argparse parser = argparse.ArgumentParser(description='Perform supervised training on a policy network.') # required args parser.add_argument("model", help="Path to a JSON model file (i.e. from CNNPolicy.save_model())") # noqa: E501 parser.add_argument("train_data", help="A .h5 file of training data") parser.add_argument("out_directory", help="directory where metadata and weights will be saved") # frequently used args parser.add_argument("--minibatch", "-B", help="Size of training data minibatches. Default: 16", type=int, default=16) # noqa: E501 parser.add_argument("--epochs", "-E", help="Total number of iterations on the data. Default: 10", type=int, default=10) # noqa: E501 parser.add_argument("--epoch-length", "-l", help="Number of training examples considered 'one epoch'. Default: # training data", type=int, default=None) # noqa: E501 parser.add_argument("--learning-rate", "-r", help="Learning rate - how quickly the model learns at first. Default: .03", type=float, default=.03) # noqa: E501 parser.add_argument("--decay", "-d", help="The rate at which learning decreases. Default: .0001", type=float, default=.0001) # noqa: E501 parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=False, action="store_true") # noqa: E501 # slightly fancier args parser.add_argument("--weights", help="Name of a .h5 weights file (in the output directory) to load to resume training", default=None) # noqa: E501 parser.add_argument("--train-val-test", help="Fraction of data to use for training/val/test. Must sum to 1. Invalid if restarting training", nargs=3, type=float, default=[0.93, .05, .02]) # noqa: E501 parser.add_argument("--symmetries", help="Comma-separated list of transforms, subset of noop,rot90,rot180,rot270,fliplr,flipud,diag1,diag2", default='noop,rot90,rot180,rot270,fliplr,flipud,diag1,diag2') # noqa: E501 # TODO - an argument to specify which transformations to use, put it in metadata if cmd_line_args is None: args = parser.parse_args() else: args = parser.parse_args(cmd_line_args) # TODO - what follows here should be refactored into a series of small functions resume = args.weights is not None if args.verbose: if resume: print("trying to resume from %s with weights %s" % (args.out_directory, os.path.join(args.out_directory, args.weights))) else: if os.path.exists(args.out_directory): print("directory %s exists. any previous data will be overwritten" % args.out_directory) else: print("starting fresh output directory %s" % args.out_directory) # load model from json spec policy = CNNPolicy.load_model(args.model) model_features = policy.preprocessor.feature_list model = policy.model if resume: model.load_weights(os.path.join(args.out_directory, args.weights)) # features of training data dataset = h5.File(args.train_data) # Verify that dataset's features match the model's expected features. if 'features' in dataset: dataset_features = dataset['features'][()] dataset_features = dataset_features.split(",") if len(dataset_features) != len(model_features) or \ any(df != mf for (df, mf) in zip(dataset_features, model_features)): raise ValueError("Model JSON file expects features \n\t%s\n" "But dataset contains \n\t%s" % ("\n\t".join(model_features), "\n\t".join(dataset_features))) elif args.verbose: print("Verified that dataset features and model features exactly match.") else: # Cannot check each feature, but can check number of planes. n_dataset_planes = dataset["states"].shape[1] tmp_preprocess = Preprocess(model_features) n_model_planes = tmp_preprocess.output_dim if n_dataset_planes != n_model_planes: raise ValueError("Model JSON file expects a total of %d planes from features \n\t%s\n" "But dataset contains %d planes" % (n_model_planes, "\n\t".join(model_features), n_dataset_planes)) elif args.verbose: print("Verified agreement of number of model and dataset feature planes, but cannot " "verify exact match using old dataset format.") n_total_data = len(dataset["states"]) n_train_data = int(args.train_val_test[0] * n_total_data) # Need to make sure training data is divisible by minibatch size or get # warning mentioning accuracy from keras n_train_data = n_train_data - (n_train_data % args.minibatch) n_val_data = n_total_data - n_train_data # n_test_data = n_total_data - (n_train_data + n_val_data) if args.verbose: print("datset loaded") print("\t%d total samples" % n_total_data) print("\t%d training samples" % n_train_data) print("\t%d validaion samples" % n_val_data) # ensure output directory is available if not os.path.exists(args.out_directory): os.makedirs(args.out_directory) # create metadata file and the callback object that will write to it meta_file = os.path.join(args.out_directory, "metadata.json") meta_writer = MetadataWriterCallback(meta_file) # load prior data if it already exists if os.path.exists(meta_file) and resume: with open(meta_file, "r") as f: meta_writer.metadata = json.load(f) if args.verbose: print("previous metadata loaded: %d epochs. new epochs will be appended." % len(meta_writer.metadata["epochs"])) elif args.verbose: print("starting with empty metadata") # the MetadataWriterCallback only sets 'epoch' and 'best_epoch'. We can add # in anything else we like here # # TODO - model and train_data are saved in meta_file; check that they match # (and make args optional when restarting?) meta_writer.metadata["training_data"] = args.train_data meta_writer.metadata["model_file"] = args.model # Record all command line args in a list so that all args are recorded even # when training is stopped and resumed. meta_writer.metadata["cmd_line_args"] = meta_writer.metadata.get("cmd_line_args", []) meta_writer.metadata["cmd_line_args"].append(vars(args)) # create ModelCheckpoint to save weights every epoch checkpoint_template = os.path.join(args.out_directory, "weights.{epoch:05d}.hdf5") checkpointer = ModelCheckpoint(checkpoint_template) # load precomputed random-shuffle indices or create them # TODO - save each train/val/test indices separately so there's no danger of # changing args.train_val_test when resuming shuffle_file = os.path.join(args.out_directory, "shuffle.npz") if os.path.exists(shuffle_file) and resume: with open(shuffle_file, "r") as f: shuffle_indices = np.load(f) if args.verbose: print("loading previous data shuffling indices") else: # create shuffled indices shuffle_indices = np.random.permutation(n_total_data) with open(shuffle_file, "w") as f: np.save(f, shuffle_indices) if args.verbose: print("created new data shuffling indices") # training indices are the first consecutive set of shuffled indices, val # next, then test gets the remainder train_indices = shuffle_indices[0:n_train_data] val_indices = shuffle_indices[n_train_data:n_train_data + n_val_data] # test_indices = shuffle_indices[n_train_data + n_val_data:] symmetries = [BOARD_TRANSFORMATIONS[name] for name in args.symmetries.strip().split(",")] # create dataset generators train_data_generator = shuffled_hdf5_batch_generator( dataset["states"], dataset["actions"], train_indices, args.minibatch, symmetries) val_data_generator = shuffled_hdf5_batch_generator( dataset["states"], dataset["actions"], val_indices, args.minibatch, symmetries) sgd = SGD(lr=args.learning_rate, decay=args.decay) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=["accuracy"]) samples_per_epoch = args.epoch_length or n_train_data if args.verbose: print("STARTING TRAINING") model.fit_generator( generator=train_data_generator, samples_per_epoch=samples_per_epoch, nb_epoch=args.epochs, callbacks=[checkpointer, meta_writer], validation_data=val_data_generator, nb_val_samples=n_val_data)
class NeuralNetBase(object): """Base class for neural network classes handling feature processing, construction of a 'forward' function, etc. """ # keep track of subclasses to make generic saving/loading cleaner. # subclasses can be 'registered' with the @neuralnet decorator subclasses = {} def __init__(self, feature_list, **kwargs): """create a neural net object that preprocesses according to feature_list and uses a neural network specified by keyword arguments (using subclass' create_network()) optional argument: init_network (boolean). If set to False, skips initializing self.model and self.forward and the calling function should set them. """ defaults = {"board": 19} defaults.update(kwargs) self.preprocessor = Preprocess(feature_list, size=defaults["board"]) kwargs["input_dim"] = self.preprocessor.get_output_dimension() if kwargs.get('init_network', True): # self.__class__ refers to the subclass so that subclasses only # need to override create_network() self.model = self.__class__.create_network(**kwargs) # self.forward is a lambda function wrapping a Keras function self.forward = self._model_forward() def _model_forward(self): """Construct a function using the current keras backend that, when given a batch of inputs, simply processes them forward and returns the output This is as opposed to model.compile(), which takes a loss function and training method. c.f. https://github.com/fchollet/keras/issues/1426 """ # The uses_learning_phase property is True if the model contains layers that behave # differently during training and testing, e.g. Dropout or BatchNormalization. # In these cases, K.learning_phase() is a reference to a backend variable that should # be set to 0 when using the network in prediction mode and is automatically set to 1 # during training. if self.model.uses_learning_phase: forward_function = K.function( [self.model.input, K.learning_phase()], [self.model.output]) # the forward_function returns a list of tensors # the first [0] gets the front tensor. return lambda inpt: forward_function([inpt, 0])[0] else: # identical but without a second input argument for the learning phase forward_function = K.function([self.model.input], [self.model.output]) return lambda inpt: forward_function([inpt])[0] @staticmethod def load_model(json_file): """create a new neural net object from the architecture specified in json_file """ with open(json_file, 'r') as f: object_specs = json.load(f) # Create object; may be a subclass of networks saved in specs['class'] class_name = object_specs.get('class', 'CNNPolicy') try: network_class = NeuralNetBase.subclasses[class_name] except KeyError: raise ValueError( "Unknown neural network type in json file: {}\n" "(was it registered with the @neuralnet decorator?)".format( class_name)) # create new object new_net = network_class(object_specs['feature_list'], init_network=False) new_net.model = model_from_json(object_specs['keras_model'], custom_objects={'Bias': Bias}) if 'weights_file' in object_specs: new_net.model.load_weights(object_specs['weights_file']) new_net.forward = new_net._model_forward() return new_net def save_model(self, json_file, weights_file=None): """write the network model and preprocessing features to the specified file If a weights_file (.hdf5 extension) is also specified, model weights are also saved to that file and will be reloaded automatically in a call to load_model """ # this looks odd because we are serializing a model with json as a string # then making that the value of an object which is then serialized as # json again. # It's not as crazy as it looks. A Network has 2 moving parts - the # feature preprocessing and the neural net, each of which gets a top-level # entry in the saved file. Keras just happens to serialize models with JSON # as well. Note how this format makes load_model fairly clean as well. object_specs = { 'class': self.__class__.__name__, 'keras_model': self.model.to_json(), 'feature_list': self.preprocessor.get_feature_list() } if weights_file is not None: self.model.save_weights(weights_file) object_specs['weights_file'] = weights_file # use the json module to write object_specs to file with open(json_file, 'w') as f: json.dump(object_specs, f)
def generate_data(player_RL, player_SL, hdf5_file, n_training_pairs, batch_size, bd_size, features, verbose, sgf_path): # used features n_features = Preprocess(features).get_output_dimension() # temporary hdf5 file tmp_file = os.path.join(os.path.dirname(hdf5_file), ".tmp." + os.path.basename(hdf5_file)) # open hdf5 file h5f = h5py.File(tmp_file, 'w') # initialize a new hdf5 file h5_states, h5_winners = init_hdf5(h5f, n_features, bd_size) # random move distribution administration distribution = {key: 0 for key in range(DEAULT_RANDOM_MOVE)} if verbose: print(str(hdf5_file) + " file initialized.") max_value = str(n_training_pairs) next_idx = 0 while True: # Randomly choose turn to play uniform random. Move prior will be from SL # policy. Moves after will be from RL policy. i_rand_move = np.random.choice(range(DEAULT_RANDOM_MOVE)) # play games states, winners = play_batch(player_RL, player_SL, batch_size, features, i_rand_move, next_idx, sgf_path) if states is not None: try: # get actual batch size in case any pair was removed actual_batch_size = len(states) # increment random distribution distribution[i_rand_move] += actual_batch_size # add states and winners to hdf5 file h5_states.resize((next_idx + actual_batch_size, n_features, bd_size, bd_size)) h5_winners.resize((next_idx + actual_batch_size, 1)) h5_states[next_idx:] = states h5_winners[next_idx:] = winners # count saved pairs next_idx += actual_batch_size except Exception as e: warnings.warn( "Unknown error occured during batch save to HDF5 file: {}". format(hdf5_file)) # noqa: E501 raise e if verbose: # primitive progress indication current = str(next_idx) while len(current) < len(max_value): current = ' ' + current line = 'Progress: ' + current + '/' + max_value sys.stdout.write('\b' * len(line)) sys.stdout.write('\r') sys.stdout.write(line) sys.stdout.flush() # stop data generation when at least n_trainings_pairs have been created if n_training_pairs <= next_idx: break # processing complete: rename tmp_file to hdf5_file h5f.close() os.rename(tmp_file, hdf5_file) if verbose: print("Value training data succesfull created.") # show random move distribution print("\nRandom move distribution:") for key in range(DEAULT_RANDOM_MOVE): print("Random move: " + str(key) + " " + str(distribution[key]))
class CNNPolicy(object): """uses a convolutional neural network to evaluate the state of the game and compute a probability distribution over the next action """ def __init__(self, feature_list, **kwargs): """create a policy object that preprocesses according to feature_list and uses a neural network specified by keyword arguments (see create_network()) """ self.preprocessor = Preprocess(feature_list) kwargs["input_dim"] = self.preprocessor.output_dim self.model = CNNPolicy.create_network(**kwargs) self.forward = self._model_forward() def _model_forward(self): """Construct a function using the current keras backend that, when given a batch of inputs, simply processes them forward and returns the output The output has size (batch x 361) for 19x19 boards (i.e. the output is a batch of distributions over flattened boards. See AlphaGo.util#flatten_idx) This is as opposed to model.compile(), which takes a loss function and training method. c.f. https://github.com/fchollet/keras/issues/1426 """ forward_function = K.function([self.model.input], [self.model.output]) # the forward_function returns a list of tensors # the first [0] gets the front tensor. return lambda inpt: forward_function([inpt])[0] def batch_eval_state(self, state_gen, batch=16): """Given a stream of states in state_gen, evaluates them in batches to make best use of GPU resources. Returns: TBD (stream of results? that would break zip(). streaming pairs of pre-zipped (state, result)?) """ raise NotImplementedError() def eval_state(self, state, moves=None): """Given a GameState object, returns a list of (action, probability) pairs according to the network outputs If a list of moves is specified, only those moves are kept in the distribution """ tensor = self.preprocessor.state_to_tensor(state) # run the tensor through the network network_output = self.forward(tensor) moves = moves or state.get_legal_moves() move_indices = [flatten_idx(m, state.size) for m in moves] # get network activations at legal move locations # note: may not be a proper distribution by ignoring illegal moves distribution = network_output[0][move_indices] distribution = distribution / distribution.sum() return zip(moves, distribution) @staticmethod def create_network(**kwargs): """construct a convolutional neural network. Keword Arguments: - input_dim: depth of features to be processed by first layer (no default) - board: width of the go board to be processed (default 19) - filters_per_layer: number of filters used on every layer (default 128) - layers: number of convolutional steps (default 12) - filter_width_K: (where K is between 1 and <layers>) width of filter on layer K (default 3 except 1st layer which defaults to 5). Must be odd. """ defaults = { "board": 19, "filters_per_layer": 128, "layers": 12, "filter_width_1": 5 } # copy defaults, but override with anything in kwargs params = defaults params.update(kwargs) # create the network: # a series of zero-paddings followed by convolutions # such that the output dimensions are also board x board network = Sequential() # create first layer network.add( convolutional.Convolution2D(input_shape=(params["input_dim"], params["board"], params["board"]), nb_filter=params["filters_per_layer"], nb_row=params["filter_width_1"], nb_col=params["filter_width_1"], init='uniform', activation='relu', border_mode='same')) # create all other layers for i in range(2, params["layers"] + 1): # use filter_width_K if it is there, otherwise use 3 filter_key = "filter_width_%d" % i filter_width = params.get(filter_key, 3) network.add( convolutional.Convolution2D( nb_filter=params["filters_per_layer"], nb_row=filter_width, nb_col=filter_width, init='uniform', activation='relu', border_mode='same')) # the last layer maps each <filters_per_layer> featuer to a number network.add( convolutional.Convolution2D(nb_filter=1, nb_row=1, nb_col=1, init='uniform', border_mode='same')) # reshape output to be board x board network.add(Flatten()) # softmax makes it into a probability distribution network.add(Activation('softmax')) return network @staticmethod def load_model(json_file): """create a new CNNPolicy object from the architecture specified in json_file """ with open(json_file, 'r') as f: object_specs = json.load(f) new_policy = CNNPolicy(object_specs['feature_list']) new_policy.model = model_from_json(object_specs['keras_model']) new_policy.forward = new_policy._model_forward() return new_policy def save_model(self, json_file): """write the network model and preprocessing features to the specified file """ # this looks odd because we are serializing a model with json as a string # then making that the value of an object which is then serialized as # json again. # It's not as crazy as it looks. A CNNPolicy has 2 moving parts - the # feature preprocessing and the neural net, each of which gets a top-level # entry in the saved file. Keras just happens to serialize models with JSON # as well. Note how this format makes load_model fairly clean as well. object_specs = { 'keras_model': self.model.to_json(), 'feature_list': self.preprocessor.feature_list } # use the json module to write object_specs to file with open(json_file, 'w') as f: json.dump(object_specs, f)