def test_get_board(self): gs = simple_board() pp = Preprocess(["board"], size=7) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) white_pos = np.asarray([ [0, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0]]) black_pos = np.asarray([ [1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0], [0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0]]) empty_pos = np.ones((gs.get_size(), gs.get_size())) - (white_pos + black_pos) # check number of planes self.assertEqual(feature.shape, (gs.get_size(), gs.get_size(), 3)) # check return value against hand-coded expectation # (given that current_player is white) self.assertTrue(np.all(feature == np.dstack((white_pos, black_pos, empty_pos))))
def test_get_board(self): gs = simple_board() pp = Preprocess(["board"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) white_pos = np.asarray([ [0, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0]]) black_pos = np.asarray([ [1, 1, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0], [0, 0, 1, 0, 1, 0, 0], [0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0]]) empty_pos = np.ones((gs.size, gs.size)) - (white_pos + black_pos) # check number of planes self.assertEqual(feature.shape, (gs.size, gs.size, 3)) # check return value against hand-coded expectation # (given that current_player is white) self.assertTrue(np.all(feature == np.dstack((white_pos, black_pos, empty_pos))))
def test_get_self_atari_size(self): # TODO - at the moment there is no imminent self-atari for white gs = simple_board() pp = Preprocess(["self_atari_size"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) self.assertTrue(np.all(feature == np.zeros((gs.size, gs.size, 8))))
def test_get_liberties(self): gs = simple_board() pp = Preprocess(["liberties"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) # todo - test liberties when > 8 one_hot_liberties = np.zeros((gs.size, gs.size, 8)) # black piece at (4,4) has a single liberty: (4,3) one_hot_liberties[4, 4, 0] = 1 # the black group in the top left corner has 2 liberties one_hot_liberties[0, 0:3, 1] = 1 # .. as do the white pieces on the left and right of the eye one_hot_liberties[3, 4, 1] = 1 one_hot_liberties[5, 4, 1] = 1 # the white group in the top left corner has 3 liberties one_hot_liberties[1, 0:2, 2] = 1 # ...as does the white piece at (4,5) one_hot_liberties[4, 5, 2] = 1 # ...and the black pieces on the sides of the eye one_hot_liberties[3, 3, 2] = 1 one_hot_liberties[5, 3, 2] = 1 # the black piece at (4,2) has 4 liberties one_hot_liberties[4, 2, 3] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), "bad expectation: stones with %d liberties" % (i + 1))
def test_get_sensibleness(self): gs, moves = parseboard.parse("x B . . W . . . .|" "B B W . . W . . .|" ". W B B W W . . .|" ". B y B W W . . .|" ". B B z B W . . .|" ". . B B B W . . .|" ". . . . . . . . W|" ". . . . . . . . W|" ". . . . . . . W s|") gs.set_current_player(go.BLACK) pp = Preprocess(["sensibleness"], size=9) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.get_size(), gs.get_size()), dtype=int) for (x, y) in gs.get_legal_moves(): expectation[x, y] = 1 # 'x', 'y', and 'z' are eyes - remove them from 'sensible' moves expectation[moves['x']] = 0 expectation[moves['y']] = 0 expectation[moves['z']] = 0 # 's' is suicide - should not be legal expectation[moves['s']] = 0 self.assertTrue(np.all(expectation == feature))
def test_get_liberties(self): gs = simple_board() pp = Preprocess(["liberties"], size=7) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) # todo - test liberties when > 8 one_hot_liberties = np.zeros((gs.get_size(), gs.get_size(), 8)) # black piece at (4,4) has a single liberty: (4,3) one_hot_liberties[4, 4, 0] = 1 # the black group in the top left corner has 2 liberties one_hot_liberties[0, 0:3, 1] = 1 # .. as do the white pieces on the left and right of the eye one_hot_liberties[3, 4, 1] = 1 one_hot_liberties[5, 4, 1] = 1 # the white group in the top left corner has 3 liberties one_hot_liberties[1, 0:2, 2] = 1 # ...as does the white piece at (4,5) one_hot_liberties[4, 5, 2] = 1 # ...and the black pieces on the sides of the eye one_hot_liberties[3, 3, 2] = 1 one_hot_liberties[5, 3, 2] = 1 # the black piece at (4,2) has 4 liberties one_hot_liberties[4, 2, 3] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), "bad expectation: stones with %d liberties" % (i + 1))
def test_get_legal(self): gs = simple_board() pp = Preprocess(["legal"]) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.size, gs.size)) for (x, y) in gs.get_legal_moves(): expectation[x, y] = 1 self.assertTrue(np.all(expectation == feature))
def test_get_legal(self): gs = simple_board() pp = Preprocess(["legal"], size=7) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.get_size(), gs.get_size())) for (x, y) in gs.get_legal_moves(): expectation[x, y] = 1 self.assertTrue(np.all(expectation == feature))
def test_get_self_atari_size(self): gs = self_atari_board() pp = Preprocess(["self_atari_size"], size=7) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_self_atari = np.zeros((gs.get_size(), gs.get_size(), 8)) # self atari of size 1 at position 0,0 one_hot_self_atari[0, 0, 0] = 1 # self atari of size 3 at position 3,4 one_hot_self_atari[3, 4, 2] = 1 self.assertTrue(np.all(feature == one_hot_self_atari))
def test_get_self_atari_size(self): gs = self_atari_board() pp = Preprocess(["self_atari_size"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_self_atari = np.zeros((gs.size, gs.size, 8)) # self atari of size 1 at position 0,0 one_hot_self_atari[0, 0, 0] = 1 # self atari of size 3 at position 3,4 one_hot_self_atari[3, 4, 2] = 1 self.assertTrue(np.all(feature == one_hot_self_atari))
def test_get_sensibleness(self): # TODO - there are no legal eyes at the moment gs = simple_board() pp = Preprocess(["sensibleness"]) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.size, gs.size)) for (x, y) in gs.get_legal_moves(): if not (gs.is_eye((x, y), go.WHITE)): expectation[x, y] = 1 self.assertTrue(np.all(expectation == feature))
def test_get_self_atari_size_cap(self): gs = capture_board() pp = Preprocess(["self_atari_size"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_self_atari = np.zeros((gs.size, gs.size, 8)) # self atari of size 1 at the ko position and just below it one_hot_self_atari[4, 5, 0] = 1 one_hot_self_atari[3, 6, 0] = 1 # self atari of size 3 at bottom corner one_hot_self_atari[6, 6, 2] = 1 self.assertTrue(np.all(feature == one_hot_self_atari))
def test_get_self_atari_size_cap(self): gs = capture_board() pp = Preprocess(["self_atari_size"], size=7) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_self_atari = np.zeros((gs.get_size(), gs.get_size(), 8)) # self atari of size 1 at the ko position and just below it one_hot_self_atari[4, 5, 0] = 1 one_hot_self_atari[3, 6, 0] = 1 # self atari of size 3 at bottom corner one_hot_self_atari[6, 6, 2] = 1 self.assertTrue(np.all(feature == one_hot_self_atari))
def test_get_ladder_capture(self): gs, moves = parseboard.parse(". . . . . . .|" "B W a . . . .|" ". B . . . . .|" ". . . . . . .|" ". . . . . . .|" ". . . . . W .|") pp = Preprocess(["ladder_capture"], size=7) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.get_size(), gs.get_size())) expectation[moves['a']] = 1 self.assertTrue(np.all(expectation == feature))
def test_get_capture_size(self): # TODO - at the moment there is no imminent capture gs = simple_board() pp = Preprocess(["capture_size"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_capture = np.zeros((gs.size, gs.size, 8)) # there is no capture available; all legal moves are zero-capture for (x, y) in gs.get_legal_moves(): one_hot_capture[x, y, 0] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_capture[:, :, i]), "bad expectation: capturing %d stones" % i)
def test_get_turns_since(self): gs = simple_board() pp = Preprocess(["turns_since"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_turns = np.zeros((gs.size, gs.size, 8)) rev_history = gs.history[::-1] # one plane per move for the last 7 for i in range(7): move = rev_history[i] one_hot_turns[move[0], move[1], i] = 1 # far back plane gets all other moves for move in rev_history[7:]: one_hot_turns[move[0], move[1], 7] = 1 self.assertTrue(np.all(feature == one_hot_turns))
def test_get_ladder_escape(self): # On this board, playing at 'a' is ladder escape because there is a breaker on the right. gs, moves = parseboard.parse(". B B . . . .|" "B W a . . . .|" ". B . . . . .|" ". . . . . W .|" ". . . . . . .|" ". . . . . . .|") pp = Preprocess(["ladder_escape"], size=7) gs.set_current_player(go.WHITE) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose expectation = np.zeros((gs.get_size(), gs.get_size())) expectation[moves['a']] = 1 self.assertTrue(np.all(expectation == feature))
def test_get_turns_since(self): gs = simple_board() pp = Preprocess(["turns_since"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_turns = np.zeros((gs.size, gs.size, 8)) rev_moves = gs.history[::-1] for x in range(gs.size): for y in range(gs.size): if gs.board[x, y] != go.EMPTY: # find most recent move at x, y age = rev_moves.index((x, y)) one_hot_turns[x, y, min(age, 7)] = 1 self.assertTrue(np.all(feature == one_hot_turns))
def test_get_capture_size(self): gs = capture_board() pp = Preprocess(["capture_size"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) score_before = gs.num_white_prisoners one_hot_capture = np.zeros((gs.size, gs.size, 8)) # there is no capture available; all legal moves are zero-capture for (x, y) in gs.get_legal_moves(): copy = gs.copy() copy.do_move((x, y)) num_captured = copy.num_white_prisoners - score_before one_hot_capture[x, y, min(7, num_captured)] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_capture[:, :, i]), "bad expectation: capturing %d stones" % i)
def test_get_liberties_after_cap(self): """A copy of test_get_liberties_after but where captures are imminent """ gs = capture_board() pp = Preprocess(["liberties_after"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_liberties = np.zeros((gs.size, gs.size, 8)) for (x, y) in gs.get_legal_moves(): copy = gs.copy() copy.do_move((x, y)) libs = copy.liberty_counts[x, y] one_hot_liberties[x, y, min(libs - 1, 7)] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), "bad expectation: stones with %d liberties after move" % (i + 1))
def test_get_liberties_after(self): gs = simple_board() pp = Preprocess(["liberties_after"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) one_hot_liberties = np.zeros((gs.size, gs.size, 8)) # TODO (?) hand-code? for (x, y) in gs.get_legal_moves(): copy = gs.copy() copy.do_move((x, y)) libs = copy.liberty_counts[x, y] if libs < 7: one_hot_liberties[x, y, libs - 1] = 1 else: one_hot_liberties[x, y, 7] = 1 for i in range(8): self.assertTrue( np.all(feature[:, :, i] == one_hot_liberties[:, :, i]), "bad expectation: stones with %d liberties after move" % (i + 1))
def test_feature_concatenation(self): gs = simple_board() pp = Preprocess(["board", "sensibleness", "capture_size"]) feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0)) expectation = np.zeros((gs.size, gs.size, 3 + 1 + 8)) # first three planes: board expectation[:, :, 0] = (gs.board == go.WHITE) * 1 expectation[:, :, 1] = (gs.board == go.BLACK) * 1 expectation[:, :, 2] = (gs.board == go.EMPTY) * 1 # 4th plane: sensibleness (as in test_get_sensibleness) for (x, y) in gs.get_legal_moves(): if not (gs.is_eye((x, y), go.WHITE)): expectation[x, y, 3] = 1 # 5th through 12th plane: capture size (all zero-capture) for (x, y) in gs.get_legal_moves(): expectation[x, y, 4] = 1 self.assertTrue(np.all(expectation == feature))
def test_two_escapes(self): gs, moves = parseboard.parse(". . X . . .|" ". X O a . .|" ". X c X . .|" ". O X b . .|" ". . O . . .|" ". . . . . .|") # place a white stone at c, and reset player to white gs.do_move(moves['c'], color=go.WHITE) gs.set_current_player(go.WHITE) pp = Preprocess(["ladder_escape"], size=6) gs.set_current_player(go.WHITE) feature = pp.state_to_tensor(gs)[0, 0] # 1D tensor; no need to transpose # both 'a' and 'b' should be considered escape moves for white after 'O' at c expectation = np.zeros((gs.get_size(), gs.get_size())) expectation[moves['a']] = 1 expectation[moves['b']] = 1 self.assertTrue(np.all(expectation == feature))
def is_ladder_capture(state, move): pp = Preprocess(["ladder_capture"], size=state.get_size()) feature = pp.state_to_tensor(state).squeeze() return feature[move] == 1
class CNNPolicy(object): """uses a convolutional neural network to evaluate the state of the game and compute a probability distribution over the next action """ def __init__(self, feature_list, **kwargs): """create a policy object that preprocesses according to feature_list and uses a neural network specified by keyword arguments (see create_network()) """ self.preprocessor = Preprocess(feature_list) kwargs["input_dim"] = self.preprocessor.output_dim self.model = CNNPolicy.create_network(**kwargs) self.forward = self._model_forward() def _model_forward(self): """Construct a function using the current keras backend that, when given a batch of inputs, simply processes them forward and returns the output This is as opposed to model.compile(), which takes a loss function and training method. c.f. https://github.com/fchollet/keras/issues/1426 """ model_input = self.model.get_input(train=False) model_output = self.model.get_output(train=False) forward_function = K.function([model_input], [model_output]) # the forward_function returns a list of tensors # the first [0] gets the front tensor. # this tensor, however, has dimensions (1, width, height) # and we just want (width,height) hence the second [0] return lambda inpt: forward_function(inpt)[0][0] def batch_eval_state(self, state_gen, batch=16): """Given a stream of states in state_gen, evaluates them in batches to make best use of GPU resources. Returns: TBD (stream of results? that would break zip(). streaming pairs of pre-zipped (state, result)?) """ raise NotImplementedError() def eval_state(self, state): """Given a GameState object, returns a list of (action, probability) pairs according to the network outputs """ tensor = self.preprocessor.state_to_tensor(state) # run the tensor through the network network_output = self.forward([tensor]) # get network activations at legal move locations # note: may not be a proper distribution by ignoring illegal moves return [((x, y), network_output[x, y]) for (x, y) in state.get_legal_moves()] @staticmethod def create_network(**kwargs): """construct a convolutional neural network. Keword Arguments: - input_dim: depth of features to be processed by first layer (no default) - board: width of the go board to be processed (default 19) - filters_per_layer: number of filters used on every layer (default 128) - layers: number of convolutional steps (default 12) - filter_width_K: (where K is between 1 and <layers>) width of filter on layer K (default 3 except 1st layer which defaults to 5). Must be odd. """ defaults = { "board": 19, "filters_per_layer": 128, "layers": 12, "filter_width_1": 5 } # copy defaults, but override with anything in kwargs params = defaults params.update(kwargs) # create the network: # a series of zero-paddings followed by convolutions # such that the output dimensions are also board x board network = Sequential() # create first layer network.add(convolutional.Convolution2D( input_shape=(params["input_dim"], params["board"], params["board"]), nb_filter=params["filters_per_layer"], nb_row=params["filter_width_1"], nb_col=params["filter_width_1"], init='uniform', activation='relu', border_mode='same')) # create all other layers for i in range(2, params["layers"] + 1): # use filter_width_K if it is there, otherwise use 3 filter_key = "filter_width_%d" % i filter_width = params.get(filter_key, 3) network.add(convolutional.Convolution2D( nb_filter=params["filters_per_layer"], nb_row=filter_width, nb_col=filter_width, init='uniform', activation='relu', border_mode='same')) # the last layer maps each <filters_per_layer> featuer to a number network.add(convolutional.Convolution2D( nb_filter=1, nb_row=1, nb_col=1, init='uniform', border_mode='same')) # reshape output to be board x board network.add(Reshape((params["board"], params["board"]))) # softmax makes it into a probability distribution network.add(Activation('softmax')) return network @staticmethod def load_model(json_file): """create a new CNNPolicy object from the architecture specified in json_file """ with open(json_file, 'r') as f: object_specs = json.load(f) new_policy = CNNPolicy(object_specs['feature_list']) new_policy.model = model_from_json(object_specs['keras_model']) new_policy.forward = new_policy._model_forward() return new_policy def save_model(self, json_file): """write the network model and preprocessing features to the specified file """ # this looks odd because we are serializing a model with json as a string # then making that the value of an object which is then serialized as # json again. # It's not as crazy as it looks. A CNNPolicy has 2 moving parts - the # feature preprocessing and the neural net, each of which gets a top-level # entry in the saved file. Keras just happens to serialize models with JSON # as well. Note how this format makes load_model fairly clean as well. object_specs = { 'keras_model': self.model.to_json(), 'feature_list': self.preprocessor.feature_list } # use the json module to write object_specs to file with open(json_file, 'w') as f: json.dump(object_specs, f)
class game_converter: def __init__(self, features): self.feature_processor = Preprocess(features) self.n_features = self.feature_processor.output_dim def convert_game(self, file_name, bd_size): """Read the given SGF file into an iterable of (input,output) pairs for neural network training Each input is a GameState converted into one-hot neural net features Each output is an action as an (x,y) pair (passes are skipped) If this game's size does not match bd_size, a SizeMismatchError is raised """ with open(file_name, 'r') as file_object: state_action_iterator = sgf_iter_states(file_object.read(), include_end=False) for (state, move, player) in state_action_iterator: if state.size != bd_size: raise SizeMismatchError() if move != go.PASS_MOVE: nn_input = self.feature_processor.state_to_tensor(state) yield (nn_input, move) def sgfs_to_hdf5(self, sgf_files, hdf5_file, bd_size=19, ignore_errors=True, verbose=False): """Convert all files in the iterable sgf_files into an hdf5 group to be stored in hdf5_file Arguments: - sgf_files : an iterable of relative or absolute paths to SGF files - hdf5_file : the name of the HDF5 where features will be saved - bd_size : side length of board of games that are loaded - ignore_errors : if True, issues a Warning when there is an unknown exception rather than halting. Note that sgf.ParseException and go.IllegalMove exceptions are always skipped The resulting file has the following properties: states : dataset with shape (n_data, n_features, board width, board height) actions : dataset with shape (n_data, 2) (actions are stored as x,y tuples of where the move was played) file_offsets : group mapping from filenames to tuples of (index, length) For example, to find what positions in the dataset come from 'test.sgf': index, length = file_offsets['test.sgf'] test_states = states[index:index+length] test_actions = actions[index:index+length] """ # TODO - also save feature list # make a hidden temporary file in case of a crash. # on success, this is renamed to hdf5_file tmp_file = os.path.join(os.path.dirname(hdf5_file), ".tmp." + os.path.basename(hdf5_file)) h5f = h5.File(tmp_file, 'w') try: # see http://docs.h5py.org/en/latest/high/group.html#Group.create_dataset states = h5f.require_dataset( 'states', dtype=np.uint8, shape=(1, self.n_features, bd_size, bd_size), maxshape=(None, self.n_features, bd_size, bd_size), # 'None' dimension allows it to grow arbitrarily exact=False, # allow non-uint8 datasets to be loaded, coerced to uint8 chunks=(64, self.n_features, bd_size, bd_size), # approximately 1MB chunks compression="lzf") actions = h5f.require_dataset( 'actions', dtype=np.uint8, shape=(1, 2), maxshape=(None, 2), exact=False, chunks=(1024, 2), compression="lzf") # 'file_offsets' is an HDF5 group so that 'file_name in file_offsets' is fast file_offsets = h5f.require_group('file_offsets') if verbose: print("created HDF5 dataset in {}".format(tmp_file)) next_idx = 0 for file_name in sgf_files: if verbose: print(file_name) # count number of state/action pairs yielded by this game n_pairs = 0 file_start_idx = next_idx try: for state, move in self.convert_game(file_name, bd_size): if next_idx >= len(states): states.resize((next_idx + 1, self.n_features, bd_size, bd_size)) actions.resize((next_idx + 1, 2)) states[next_idx] = state actions[next_idx] = move n_pairs += 1 next_idx += 1 except go.IllegalMove: warnings.warn("Illegal Move encountered in %s\n\tdropping the remainder of the game" % file_name) except sgf.ParseException: warnings.warn("Could not parse %s\n\tdropping game" % file_name) except SizeMismatchError: warnings.warn("Skipping %s; wrong board size" % file_name) except Exception as e: # catch everything else if ignore_errors: warnings.warn("Unkown exception with file %s\n\t%s" % (file_name, e), stacklevel=2) else: raise e finally: if n_pairs > 0: # '/' has special meaning in HDF5 key names, so they are replaced with ':' here file_name_key = file_name.replace('/', ':') file_offsets[file_name_key] = [file_start_idx, n_pairs] if verbose: print("\t%d state/action pairs extracted" % n_pairs) elif verbose: print("\t-no usable data-") except Exception as e: print("sgfs_to_hdf5 failed") os.remove(tmp_file) raise e if verbose: print("finished. renaming %s to %s" % (tmp_file, hdf5_file)) # processing complete; rename tmp_file to hdf5_file h5f.close() os.rename(tmp_file, hdf5_file)
class CNNPolicy(object): """uses a convolutional neural network to evaluate the state of the game and compute a probability distribution over the next action """ def __init__(self, feature_list, **kwargs): """create a policy object that preprocesses according to feature_list and uses a neural network specified by keyword arguments (see create_network()) """ self.preprocessor = Preprocess(feature_list) kwargs["input_dim"] = self.preprocessor.output_dim self.model = CNNPolicy.create_network(**kwargs) self.forward = self._model_forward() def _model_forward(self): """Construct a function using the current keras backend that, when given a batch of inputs, simply processes them forward and returns the output The output has size (batch x 361) for 19x19 boards (i.e. the output is a batch of distributions over flattened boards. See AlphaGo.util#flatten_idx) This is as opposed to model.compile(), which takes a loss function and training method. c.f. https://github.com/fchollet/keras/issues/1426 """ forward_function = K.function([self.model.input], [self.model.output]) # the forward_function returns a list of tensors # the first [0] gets the front tensor. return lambda inpt: forward_function([inpt])[0] def batch_eval_state(self, state_gen, batch=16): """Given a stream of states in state_gen, evaluates them in batches to make best use of GPU resources. Returns: TBD (stream of results? that would break zip(). streaming pairs of pre-zipped (state, result)?) """ raise NotImplementedError() def eval_state(self, state, moves=None): """Given a GameState object, returns a list of (action, probability) pairs according to the network outputs If a list of moves is specified, only those moves are kept in the distribution """ tensor = self.preprocessor.state_to_tensor(state) # run the tensor through the network network_output = self.forward(tensor) moves = moves or state.get_legal_moves() move_indices = [flatten_idx(m, state.size) for m in moves] # get network activations at legal move locations # note: may not be a proper distribution by ignoring illegal moves distribution = network_output[0][move_indices] distribution = distribution / distribution.sum() return zip(moves, distribution) @staticmethod def create_network(**kwargs): """construct a convolutional neural network. Keword Arguments: - input_dim: depth of features to be processed by first layer (no default) - board: width of the go board to be processed (default 19) - filters_per_layer: number of filters used on every layer (default 128) - layers: number of convolutional steps (default 12) - filter_width_K: (where K is between 1 and <layers>) width of filter on layer K (default 3 except 1st layer which defaults to 5). Must be odd. """ defaults = { "board": 19, "filters_per_layer": 128, "layers": 12, "filter_width_1": 5 } # copy defaults, but override with anything in kwargs params = defaults params.update(kwargs) # create the network: # a series of zero-paddings followed by convolutions # such that the output dimensions are also board x board network = Sequential() # create first layer network.add( convolutional.Convolution2D(input_shape=(params["input_dim"], params["board"], params["board"]), nb_filter=params["filters_per_layer"], nb_row=params["filter_width_1"], nb_col=params["filter_width_1"], init='uniform', activation='relu', border_mode='same')) # create all other layers for i in range(2, params["layers"] + 1): # use filter_width_K if it is there, otherwise use 3 filter_key = "filter_width_%d" % i filter_width = params.get(filter_key, 3) network.add( convolutional.Convolution2D( nb_filter=params["filters_per_layer"], nb_row=filter_width, nb_col=filter_width, init='uniform', activation='relu', border_mode='same')) # the last layer maps each <filters_per_layer> featuer to a number network.add( convolutional.Convolution2D(nb_filter=1, nb_row=1, nb_col=1, init='uniform', border_mode='same')) # reshape output to be board x board network.add(Flatten()) # softmax makes it into a probability distribution network.add(Activation('softmax')) return network @staticmethod def load_model(json_file): """create a new CNNPolicy object from the architecture specified in json_file """ with open(json_file, 'r') as f: object_specs = json.load(f) new_policy = CNNPolicy(object_specs['feature_list']) new_policy.model = model_from_json(object_specs['keras_model']) new_policy.forward = new_policy._model_forward() return new_policy def save_model(self, json_file): """write the network model and preprocessing features to the specified file """ # this looks odd because we are serializing a model with json as a string # then making that the value of an object which is then serialized as # json again. # It's not as crazy as it looks. A CNNPolicy has 2 moving parts - the # feature preprocessing and the neural net, each of which gets a top-level # entry in the saved file. Keras just happens to serialize models with JSON # as well. Note how this format makes load_model fairly clean as well. object_specs = { 'keras_model': self.model.to_json(), 'feature_list': self.preprocessor.feature_list } # use the json module to write object_specs to file with open(json_file, 'w') as f: json.dump(object_specs, f)