def test_get_board(self):
		gs = simple_board()
		pp = Preprocess(["board"])
		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

		white_pos = np.asarray([
			[0, 0, 0, 0, 0, 0, 0],
			[1, 1, 0, 0, 0, 0, 0],
			[0, 0, 0, 0, 0, 0, 0],
			[0, 0, 0, 0, 1, 0, 0],
			[0, 0, 0, 0, 0, 1, 0],
			[0, 0, 0, 0, 1, 0, 0],
			[0, 0, 0, 0, 0, 0, 0]])
		black_pos = np.asarray([
			[1, 1, 1, 0, 0, 0, 0],
			[0, 0, 0, 0, 0, 0, 0],
			[0, 0, 0, 0, 0, 0, 0],
			[0, 0, 0, 1, 0, 0, 0],
			[0, 0, 1, 0, 1, 0, 0],
			[0, 0, 0, 1, 0, 0, 0],
			[0, 0, 0, 0, 0, 0, 0]])
		empty_pos = np.ones((gs.size, gs.size)) - (white_pos + black_pos)

		# check number of planes
		self.assertEqual(feature.shape, (gs.size, gs.size, 3))
		# check return value against hand-coded expectation
		# (given that current_player is white)
		self.assertTrue(np.all(feature == np.dstack((white_pos, black_pos, empty_pos))))
示例#2
0
    def test_get_self_atari_size(self):
        # TODO - at the moment there is no imminent self-atari for white
        gs = simple_board()
        pp = Preprocess(["self_atari_size"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        self.assertTrue(np.all(feature == np.zeros((gs.size, gs.size, 8))))
	def test_get_liberties(self):
		gs = simple_board()
		pp = Preprocess(["liberties"])
		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

		# todo - test liberties when > 8

		one_hot_liberties = np.zeros((gs.size, gs.size, 8))
		# black piece at (4,4) has a single liberty: (4,3)
		one_hot_liberties[4, 4, 0] = 1

		# the black group in the top left corner has 2 liberties
		one_hot_liberties[0, 0:3, 1] = 1
		# 	.. as do the white pieces on the left and right of the eye
		one_hot_liberties[3, 4, 1] = 1
		one_hot_liberties[5, 4, 1] = 1

		# the white group in the top left corner has 3 liberties
		one_hot_liberties[1, 0:2, 2] = 1
		# 	...as does the white piece at (4,5)
		one_hot_liberties[4, 5, 2] = 1
		# 	...and the black pieces on the sides of the eye
		one_hot_liberties[3, 3, 2] = 1
		one_hot_liberties[5, 3, 2] = 1

		# the black piece at (4,2) has 4 liberties
		one_hot_liberties[4, 2, 3] = 1

		for i in range(8):
			self.assertTrue(
				np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
				"bad expectation: stones with %d liberties" % (i + 1))
示例#4
0
    def test_get_board(self):
        gs = simple_board()
        pp = Preprocess(["board"], size=7)
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        white_pos = np.asarray([
            [0, 0, 0, 0, 0, 0, 0],
            [1, 1, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 1, 0, 0],
            [0, 0, 0, 0, 0, 1, 0],
            [0, 0, 0, 0, 1, 0, 0],
            [0, 0, 0, 0, 0, 0, 0]])
        black_pos = np.asarray([
            [1, 1, 1, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 1, 0, 0, 0],
            [0, 0, 1, 0, 1, 0, 0],
            [0, 0, 0, 1, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0]])
        empty_pos = np.ones((gs.get_size(), gs.get_size())) - (white_pos + black_pos)

        # check number of planes
        self.assertEqual(feature.shape, (gs.get_size(), gs.get_size(), 3))
        # check return value against hand-coded expectation
        # (given that current_player is white)
        self.assertTrue(np.all(feature == np.dstack((white_pos, black_pos, empty_pos))))
示例#5
0
    def test_get_liberties(self):
        gs = simple_board()
        pp = Preprocess(["liberties"], size=7)
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        # todo - test liberties when > 8

        one_hot_liberties = np.zeros((gs.get_size(), gs.get_size(), 8))
        # black piece at (4,4) has a single liberty: (4,3)
        one_hot_liberties[4, 4, 0] = 1

        # the black group in the top left corner has 2 liberties
        one_hot_liberties[0, 0:3, 1] = 1
        #     .. as do the white pieces on the left and right of the eye
        one_hot_liberties[3, 4, 1] = 1
        one_hot_liberties[5, 4, 1] = 1

        # the white group in the top left corner has 3 liberties
        one_hot_liberties[1, 0:2, 2] = 1
        #     ...as does the white piece at (4,5)
        one_hot_liberties[4, 5, 2] = 1
        #     ...and the black pieces on the sides of the eye
        one_hot_liberties[3, 3, 2] = 1
        one_hot_liberties[5, 3, 2] = 1

        # the black piece at (4,2) has 4 liberties
        one_hot_liberties[4, 2, 3] = 1

        for i in range(8):
            self.assertTrue(
                np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
                "bad expectation: stones with %d liberties" % (i + 1))
示例#6
0
    def test_get_sensibleness(self):
        gs, moves = parseboard.parse("x B . . W . . . .|"
                                     "B B W . . W . . .|"
                                     ". W B B W W . . .|"
                                     ". B y B W W . . .|"
                                     ". B B z B W . . .|"
                                     ". . B B B W . . .|"
                                     ". . . . . . . . W|"
                                     ". . . . . . . . W|"
                                     ". . . . . . . W s|")
        gs.set_current_player(go.BLACK)

        pp = Preprocess(["sensibleness"], size=9)
        feature = pp.state_to_tensor(gs)[0, 0]  # 1D tensor; no need to transpose

        expectation = np.zeros((gs.get_size(), gs.get_size()), dtype=int)

        for (x, y) in gs.get_legal_moves():
            expectation[x, y] = 1

        # 'x', 'y', and 'z' are eyes - remove them from 'sensible' moves
        expectation[moves['x']] = 0
        expectation[moves['y']] = 0
        expectation[moves['z']] = 0

        # 's' is suicide - should not be legal
        expectation[moves['s']] = 0

        self.assertTrue(np.all(expectation == feature))
示例#7
0
	def test_get_self_atari_size(self):
		# TODO - at the moment there is no imminent self-atari for white
		gs = simple_board()
		pp = Preprocess(["self_atari_size"])
		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

		self.assertTrue(np.all(feature == np.zeros((gs.size, gs.size, 8))))
示例#8
0
def validate_feature_planes(verbose, dataset, model_features):
    """Verify that dataset's features match the model's expected features.
    """

    if 'features' in dataset:
        dataset_features = dataset['features'][()]
        dataset_features = dataset_features.split(",")
        if len(dataset_features) != len(model_features) or \
           any(df != mf for (df, mf) in zip(dataset_features, model_features)):
            raise ValueError(
                "Model JSON file expects features \n\t%s\n"
                "But dataset contains \n\t%s" %
                ("\n\t".join(model_features), "\n\t".join(dataset_features)))
        elif verbose:
            print(
                "Verified that dataset features and model features exactly match."
            )
    else:
        # Cannot check each feature, but can check number of planes.
        n_dataset_planes = dataset["states"].shape[1]
        tmp_preprocess = Preprocess(model_features)
        n_model_planes = tmp_preprocess.get_output_dimension()
        if n_dataset_planes != n_model_planes:
            raise ValueError(
                "Model JSON file expects a total of %d planes from features \n\t%s\n"
                "But dataset contains %d planes" %
                (n_model_planes, "\n\t".join(model_features),
                 n_dataset_planes))
        elif verbose:
            print(
                "Verified agreement of number of model and dataset feature planes, but cannot "
                "verify exact match using old dataset format.")
示例#9
0
    def __init__(self, feature_list, **kwargs):
        """create a policy object that preprocesses according to feature_list and uses
		a neural network specified by keyword arguments (see create_network())
		"""
        self.preprocessor = Preprocess(feature_list)
        kwargs["input_dim"] = self.preprocessor.output_dim
        self.model = CNNPolicy.create_network(**kwargs)
        self.forward = self._model_forward()
	def test_get_legal(self):
		gs = simple_board()
		pp = Preprocess(["legal"])
		feature = pp.state_to_tensor(gs)[0, 0]  # 1D tensor; no need to transpose

		expectation = np.zeros((gs.size, gs.size))
		for (x, y) in gs.get_legal_moves():
			expectation[x, y] = 1
		self.assertTrue(np.all(expectation == feature))
示例#11
0
    def test_get_legal(self):
        gs = simple_board()
        pp = Preprocess(["legal"], size=7)
        feature = pp.state_to_tensor(gs)[0, 0]  # 1D tensor; no need to transpose

        expectation = np.zeros((gs.get_size(), gs.get_size()))
        for (x, y) in gs.get_legal_moves():
            expectation[x, y] = 1
        self.assertTrue(np.all(expectation == feature))
	def test_get_self_atari_size(self):
		gs = self_atari_board()
		pp = Preprocess(["self_atari_size"])
		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

		one_hot_self_atari = np.zeros((gs.size, gs.size, 8))
		# self atari of size 1 at position 0,0
		one_hot_self_atari[0, 0, 0] = 1
		# self atari of size 3 at position 3,4
		one_hot_self_atari[3, 4, 2] = 1

		self.assertTrue(np.all(feature == one_hot_self_atari))
	def test_get_sensibleness(self):
		# TODO - there are no legal eyes at the moment

		gs = simple_board()
		pp = Preprocess(["sensibleness"])
		feature = pp.state_to_tensor(gs)[0, 0]  # 1D tensor; no need to transpose

		expectation = np.zeros((gs.size, gs.size))
		for (x, y) in gs.get_legal_moves():
			if not (gs.is_eye((x, y), go.WHITE)):
				expectation[x, y] = 1
		self.assertTrue(np.all(expectation == feature))
示例#14
0
    def test_get_self_atari_size(self):
        gs = self_atari_board()
        pp = Preprocess(["self_atari_size"], size=7)
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_self_atari = np.zeros((gs.get_size(), gs.get_size(), 8))
        # self atari of size 1 at position 0,0
        one_hot_self_atari[0, 0, 0] = 1
        # self atari of size 3 at position 3,4
        one_hot_self_atari[3, 4, 2] = 1

        self.assertTrue(np.all(feature == one_hot_self_atari))
    def test_get_sensibleness(self):
        # TODO - there are no legal eyes at the moment

        gs = simple_board()
        pp = Preprocess(["sensibleness"])
        feature = pp.state_to_tensor(gs)[0,
                                         0]  # 1D tensor; no need to transpose

        expectation = np.zeros((gs.size, gs.size))
        for (x, y) in gs.get_legal_moves():
            if not (gs.is_eye((x, y), go.WHITE)):
                expectation[x, y] = 1
        self.assertTrue(np.all(expectation == feature))
示例#16
0
    def test_get_self_atari_size_cap(self):
        gs = capture_board()
        pp = Preprocess(["self_atari_size"], size=7)
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_self_atari = np.zeros((gs.get_size(), gs.get_size(), 8))
        # self atari of size 1 at the ko position and just below it
        one_hot_self_atari[4, 5, 0] = 1
        one_hot_self_atari[3, 6, 0] = 1
        # self atari of size 3 at bottom corner
        one_hot_self_atari[6, 6, 2] = 1

        self.assertTrue(np.all(feature == one_hot_self_atari))
	def test_get_self_atari_size_cap(self):
		gs = capture_board()
		pp = Preprocess(["self_atari_size"])
		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

		one_hot_self_atari = np.zeros((gs.size, gs.size, 8))
		# self atari of size 1 at the ko position and just below it
		one_hot_self_atari[4, 5, 0] = 1
		one_hot_self_atari[3, 6, 0] = 1
		# self atari of size 3 at bottom corner
		one_hot_self_atari[6, 6, 2] = 1

		self.assertTrue(np.all(feature == one_hot_self_atari))
示例#18
0
    def test_get_ladder_capture(self):
        gs, moves = parseboard.parse(". . . . . . .|"
                                     "B W a . . . .|"
                                     ". B . . . . .|"
                                     ". . . . . . .|"
                                     ". . . . . . .|"
                                     ". . . . . W .|")
        pp = Preprocess(["ladder_capture"], size=7)
        feature = pp.state_to_tensor(gs)[0, 0]  # 1D tensor; no need to transpose

        expectation = np.zeros((gs.get_size(), gs.get_size()))
        expectation[moves['a']] = 1

        self.assertTrue(np.all(expectation == feature))
示例#19
0
    def test_get_capture_size(self):
        # TODO - at the moment there is no imminent capture
        gs = simple_board()
        pp = Preprocess(["capture_size"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_capture = np.zeros((gs.size, gs.size, 8))
        # there is no capture available; all legal moves are zero-capture
        for (x, y) in gs.get_legal_moves():
            one_hot_capture[x, y, 0] = 1

        for i in range(8):
            self.assertTrue(
                np.all(feature[:, :, i] == one_hot_capture[:, :, i]),
                "bad expectation: capturing %d stones" % i)
示例#20
0
	def test_get_capture_size(self):
		# TODO - at the moment there is no imminent capture
		gs = simple_board()
		pp = Preprocess(["capture_size"])
		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

		one_hot_capture = np.zeros((gs.size, gs.size, 8))
		# there is no capture available; all legal moves are zero-capture
		for (x, y) in gs.get_legal_moves():
			one_hot_capture[x, y, 0] = 1

		for i in range(8):
			self.assertTrue(
				np.all(feature[:, :, i] == one_hot_capture[:, :, i]),
				"bad expectation: capturing %d stones" % i)
示例#21
0
    def test_get_ladder_escape(self):
        # On this board, playing at 'a' is ladder escape because there is a breaker on the right.
        gs, moves = parseboard.parse(". B B . . . .|"
                                     "B W a . . . .|"
                                     ". B . . . . .|"
                                     ". . . . . W .|"
                                     ". . . . . . .|"
                                     ". . . . . . .|")
        pp = Preprocess(["ladder_escape"], size=7)
        gs.set_current_player(go.WHITE)
        feature = pp.state_to_tensor(gs)[0, 0]  # 1D tensor; no need to transpose

        expectation = np.zeros((gs.get_size(), gs.get_size()))
        expectation[moves['a']] = 1

        self.assertTrue(np.all(expectation == feature))
示例#22
0
	def test_get_turns_since(self):
		gs = simple_board()
		pp = Preprocess(["turns_since"])
		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

		one_hot_turns = np.zeros((gs.size, gs.size, 8))
		rev_history = gs.history[::-1]
		# one plane per move for the last 7
		for i in range(7):
			move = rev_history[i]
			one_hot_turns[move[0], move[1], i] = 1
		# far back plane gets all other moves
		for move in rev_history[7:]:
			one_hot_turns[move[0], move[1], 7] = 1

		self.assertTrue(np.all(feature == one_hot_turns))
    def test_get_turns_since(self):
        gs = simple_board()
        pp = Preprocess(["turns_since"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_turns = np.zeros((gs.size, gs.size, 8))

        rev_moves = gs.history[::-1]

        for x in range(gs.size):
            for y in range(gs.size):
                if gs.board[x, y] != go.EMPTY:
                    # find most recent move at x, y
                    age = rev_moves.index((x, y))
                    one_hot_turns[x, y, min(age, 7)] = 1

        self.assertTrue(np.all(feature == one_hot_turns))
	def test_get_turns_since(self):
		gs = simple_board()
		pp = Preprocess(["turns_since"])
		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

		one_hot_turns = np.zeros((gs.size, gs.size, 8))

		rev_moves = gs.history[::-1]

		for x in range(gs.size):
			for y in range(gs.size):
				if gs.board[x, y] != go.EMPTY:
					# find most recent move at x, y
					age = rev_moves.index((x, y))
					one_hot_turns[x, y, min(age, 7)] = 1

		self.assertTrue(np.all(feature == one_hot_turns))
示例#25
0
	def __init__(self, feature_list, **kwargs):
		"""create a policy object that preprocesses according to feature_list and uses
		a neural network specified by keyword arguments (see create_network())
		"""
		self.preprocessor = Preprocess(feature_list)
		kwargs["input_dim"] = self.preprocessor.output_dim
		self.model = CNNPolicy.create_network(**kwargs)
		self.forward = self._model_forward()
示例#26
0
    def __init__(self, feature_list, **kwargs):
        """create a neural net object that preprocesses according to feature_list and uses
        a neural network specified by keyword arguments (using subclass' create_network())

        optional argument: init_network (boolean). If set to False, skips initializing
        self.model and self.forward and the calling function should set them.
        """
        defaults = {"board": 19}
        defaults.update(kwargs)
        self.preprocessor = Preprocess(feature_list, size=defaults["board"])
        kwargs["input_dim"] = self.preprocessor.get_output_dimension()

        if kwargs.get('init_network', True):
            # self.__class__ refers to the subclass so that subclasses only
            # need to override create_network()
            self.model = self.__class__.create_network(**kwargs)
            # self.forward is a lambda function wrapping a Keras function
            self.forward = self._model_forward()
    def test_get_capture_size(self):
        gs = capture_board()
        pp = Preprocess(["capture_size"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        score_before = gs.num_white_prisoners
        one_hot_capture = np.zeros((gs.size, gs.size, 8))
        # there is no capture available; all legal moves are zero-capture
        for (x, y) in gs.get_legal_moves():
            copy = gs.copy()
            copy.do_move((x, y))
            num_captured = copy.num_white_prisoners - score_before
            one_hot_capture[x, y, min(7, num_captured)] = 1

        for i in range(8):
            self.assertTrue(
                np.all(feature[:, :, i] == one_hot_capture[:, :, i]),
                "bad expectation: capturing %d stones" % i)
	def test_get_capture_size(self):
		gs = capture_board()
		pp = Preprocess(["capture_size"])
		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

		score_before = gs.num_white_prisoners
		one_hot_capture = np.zeros((gs.size, gs.size, 8))
		# there is no capture available; all legal moves are zero-capture
		for (x, y) in gs.get_legal_moves():
			copy = gs.copy()
			copy.do_move((x, y))
			num_captured = copy.num_white_prisoners - score_before
			one_hot_capture[x, y, min(7, num_captured)] = 1

		for i in range(8):
			self.assertTrue(
				np.all(feature[:, :, i] == one_hot_capture[:, :, i]),
				"bad expectation: capturing %d stones" % i)
	def test_get_liberties_after_cap(self):
		"""A copy of test_get_liberties_after but where captures are imminent
		"""
		gs = capture_board()
		pp = Preprocess(["liberties_after"])
		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

		one_hot_liberties = np.zeros((gs.size, gs.size, 8))

		for (x, y) in gs.get_legal_moves():
			copy = gs.copy()
			copy.do_move((x, y))
			libs = copy.liberty_counts[x, y]
			one_hot_liberties[x, y, min(libs - 1, 7)] = 1

		for i in range(8):
			self.assertTrue(
				np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
				"bad expectation: stones with %d liberties after move" % (i + 1))
    def test_get_liberties_after_cap(self):
        """A copy of test_get_liberties_after but where captures are imminent
        """
        gs = capture_board()
        pp = Preprocess(["liberties_after"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_liberties = np.zeros((gs.size, gs.size, 8))

        for (x, y) in gs.get_legal_moves():
            copy = gs.copy()
            copy.do_move((x, y))
            libs = copy.liberty_counts[x, y]
            one_hot_liberties[x, y, min(libs - 1, 7)] = 1

        for i in range(8):
            self.assertTrue(
                np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
                "bad expectation: stones with %d liberties after move" %
                (i + 1))
	def test_get_liberties_after(self):
		gs = simple_board()
		pp = Preprocess(["liberties_after"])
		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

		one_hot_liberties = np.zeros((gs.size, gs.size, 8))

		# TODO (?) hand-code?
		for (x, y) in gs.get_legal_moves():
			copy = gs.copy()
			copy.do_move((x, y))
			libs = copy.liberty_counts[x, y]
			if libs < 7:
				one_hot_liberties[x, y, libs - 1] = 1
			else:
				one_hot_liberties[x, y, 7] = 1

		for i in range(8):
			self.assertTrue(
				np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
				"bad expectation: stones with %d liberties after move" % (i + 1))
    def test_get_liberties_after(self):
        gs = simple_board()
        pp = Preprocess(["liberties_after"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        one_hot_liberties = np.zeros((gs.size, gs.size, 8))

        # TODO (?) hand-code?
        for (x, y) in gs.get_legal_moves():
            copy = gs.copy()
            copy.do_move((x, y))
            libs = copy.liberty_counts[x, y]
            if libs < 7:
                one_hot_liberties[x, y, libs - 1] = 1
            else:
                one_hot_liberties[x, y, 7] = 1

        for i in range(8):
            self.assertTrue(
                np.all(feature[:, :, i] == one_hot_liberties[:, :, i]),
                "bad expectation: stones with %d liberties after move" %
                (i + 1))
	def test_feature_concatenation(self):
		gs = simple_board()
		pp = Preprocess(["board", "sensibleness", "capture_size"])
		feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

		expectation = np.zeros((gs.size, gs.size, 3 + 1 + 8))

		# first three planes: board
		expectation[:, :, 0] = (gs.board == go.WHITE) * 1
		expectation[:, :, 1] = (gs.board == go.BLACK) * 1
		expectation[:, :, 2] = (gs.board == go.EMPTY) * 1

		# 4th plane: sensibleness (as in test_get_sensibleness)
		for (x, y) in gs.get_legal_moves():
			if not (gs.is_eye((x, y), go.WHITE)):
				expectation[x, y, 3] = 1

		# 5th through 12th plane: capture size (all zero-capture)
		for (x, y) in gs.get_legal_moves():
			expectation[x, y, 4] = 1

		self.assertTrue(np.all(expectation == feature))
    def test_feature_concatenation(self):
        gs = simple_board()
        pp = Preprocess(["board", "sensibleness", "capture_size"])
        feature = pp.state_to_tensor(gs)[0].transpose((1, 2, 0))

        expectation = np.zeros((gs.size, gs.size, 3 + 1 + 8))

        # first three planes: board
        expectation[:, :, 0] = (gs.board == go.WHITE) * 1
        expectation[:, :, 1] = (gs.board == go.BLACK) * 1
        expectation[:, :, 2] = (gs.board == go.EMPTY) * 1

        # 4th plane: sensibleness (as in test_get_sensibleness)
        for (x, y) in gs.get_legal_moves():
            if not (gs.is_eye((x, y), go.WHITE)):
                expectation[x, y, 3] = 1

        # 5th through 12th plane: capture size (all zero-capture)
        for (x, y) in gs.get_legal_moves():
            expectation[x, y, 4] = 1

        self.assertTrue(np.all(expectation == feature))
示例#35
0
    def test_two_escapes(self):
        gs, moves = parseboard.parse(". . X . . .|"
                                     ". X O a . .|"
                                     ". X c X . .|"
                                     ". O X b . .|"
                                     ". . O . . .|"
                                     ". . . . . .|")

        # place a white stone at c, and reset player to white
        gs.do_move(moves['c'], color=go.WHITE)
        gs.set_current_player(go.WHITE)

        pp = Preprocess(["ladder_escape"], size=6)
        gs.set_current_player(go.WHITE)
        feature = pp.state_to_tensor(gs)[0, 0]  # 1D tensor; no need to transpose

        # both 'a' and 'b' should be considered escape moves for white after 'O' at c

        expectation = np.zeros((gs.get_size(), gs.get_size()))
        expectation[moves['a']] = 1
        expectation[moves['b']] = 1

        self.assertTrue(np.all(expectation == feature))
示例#36
0
def is_ladder_capture(state, move):
    pp = Preprocess(["ladder_capture"], size=state.get_size())
    feature = pp.state_to_tensor(state).squeeze()
    return feature[move] == 1
示例#37
0
class CNNPolicy(object):
	"""uses a convolutional neural network to evaluate the state of the game
	and compute a probability distribution over the next action
	"""

	def __init__(self, feature_list, **kwargs):
		"""create a policy object that preprocesses according to feature_list and uses
		a neural network specified by keyword arguments (see create_network())
		"""
		self.preprocessor = Preprocess(feature_list)
		kwargs["input_dim"] = self.preprocessor.output_dim
		self.model = CNNPolicy.create_network(**kwargs)
		self.forward = self._model_forward()

	def _model_forward(self):
		"""Construct a function using the current keras backend that, when given a batch
		of inputs, simply processes them forward and returns the output

		This is as opposed to model.compile(), which takes a loss function
		and training method.

		c.f. https://github.com/fchollet/keras/issues/1426
		"""
		model_input = self.model.get_input(train=False)
		model_output = self.model.get_output(train=False)
		forward_function = K.function([model_input], [model_output])

		# the forward_function returns a list of tensors
		# the first [0] gets the front tensor.
		# this tensor, however, has dimensions (1, width, height)
		# and we just want (width,height) hence the second [0]
		return lambda inpt: forward_function(inpt)[0][0]

	def batch_eval_state(self, state_gen, batch=16):
		"""Given a stream of states in state_gen, evaluates them in batches
		to make best use of GPU resources.

		Returns: TBD (stream of results? that would break zip().
			streaming pairs of pre-zipped (state, result)?)
		"""
		raise NotImplementedError()

	def eval_state(self, state):
		"""Given a GameState object, returns a list of (action, probability) pairs
		according to the network outputs
		"""
		tensor = self.preprocessor.state_to_tensor(state)

		# run the tensor through the network
		network_output = self.forward([tensor])

		# get network activations at legal move locations
		# note: may not be a proper distribution by ignoring illegal moves
		return [((x, y), network_output[x, y]) for (x, y) in state.get_legal_moves()]

	@staticmethod
	def create_network(**kwargs):
		"""construct a convolutional neural network.

		Keword Arguments:
		- input_dim:         	depth of features to be processed by first layer (no default)
		- board:             	width of the go board to be processed (default 19)
		- filters_per_layer: 	number of filters used on every layer (default 128)
		- layers:            	number of convolutional steps (default 12)
		- filter_width_K:    	(where K is between 1 and <layers>) width of filter on
								layer K (default 3 except 1st layer which defaults to 5).
								Must be odd.
		"""
		defaults = {
			"board": 19,
			"filters_per_layer": 128,
			"layers": 12,
			"filter_width_1": 5
		}
		# copy defaults, but override with anything in kwargs
		params = defaults
		params.update(kwargs)

		# create the network:
		# a series of zero-paddings followed by convolutions
		# such that the output dimensions are also board x board
		network = Sequential()

		# create first layer
		network.add(convolutional.Convolution2D(
			input_shape=(params["input_dim"], params["board"], params["board"]),
			nb_filter=params["filters_per_layer"],
			nb_row=params["filter_width_1"],
			nb_col=params["filter_width_1"],
			init='uniform',
			activation='relu',
			border_mode='same'))

		# create all other layers
		for i in range(2, params["layers"] + 1):
			# use filter_width_K if it is there, otherwise use 3
			filter_key = "filter_width_%d" % i
			filter_width = params.get(filter_key, 3)
			network.add(convolutional.Convolution2D(
				nb_filter=params["filters_per_layer"],
				nb_row=filter_width,
				nb_col=filter_width,
				init='uniform',
				activation='relu',
				border_mode='same'))

		# the last layer maps each <filters_per_layer> featuer to a number
		network.add(convolutional.Convolution2D(
			nb_filter=1,
			nb_row=1,
			nb_col=1,
			init='uniform',
			border_mode='same'))
		# reshape output to be board x board
		network.add(Reshape((params["board"], params["board"])))
		# softmax makes it into a probability distribution
		network.add(Activation('softmax'))

		return network

	@staticmethod
	def load_model(json_file):
		"""create a new CNNPolicy object from the architecture specified in json_file
		"""
		with open(json_file, 'r') as f:
			object_specs = json.load(f)
		new_policy = CNNPolicy(object_specs['feature_list'])
		new_policy.model = model_from_json(object_specs['keras_model'])
		new_policy.forward = new_policy._model_forward()
		return new_policy

	def save_model(self, json_file):
		"""write the network model and preprocessing features to the specified file
		"""
		# this looks odd because we are serializing a model with json as a string
		# then making that the value of an object which is then serialized as
		# json again.
		# It's not as crazy as it looks. A CNNPolicy has 2 moving parts - the
		# feature preprocessing and the neural net, each of which gets a top-level
		# entry in the saved file. Keras just happens to serialize models with JSON
		# as well. Note how this format makes load_model fairly clean as well.
		object_specs = {
			'keras_model': self.model.to_json(),
			'feature_list': self.preprocessor.feature_list
		}
		# use the json module to write object_specs to file
		with open(json_file, 'w') as f:
			json.dump(object_specs, f)
def play_batch(player_RL, player_SL, batch_size, features, i_rand_move,
               next_idx, sgf_path):
    """Play a batch of games in parallel and return one training pair from each game.

    As described in Silver et al, the method for generating value net training data is as follows:

    * pick a number between 1 and 450
    * use the supervised-learning policy to play a game against itself up to that number of moves.
    * now go off-policy and pick a totally random move
    * play out the rest of the game with the reinforcement-learning policy
    * save the state that occurred *right after* the random move,
    * and the end result of the game, as the training pair
    """
    def do_move(states, moves):
        for st, mv in zip(states, moves):
            if not st.is_end_of_game():
                # Only do more moves if not end of game already
                st.do_move(mv)
        return states

    def do_rand_move(states):
        """Do a uniform-random move over legal moves and record info for
           training. Only gets called once per game.
        """

        # get legal moves and play one at random
        legal_moves = [st.get_legal_moves() for st in states]
        rand_moves = [lm[np.random.choice(len(lm))] for lm in legal_moves]
        states = do_move(states, rand_moves)

        # copy all states, these are the generated training data
        training_state_list = [st.copy() for st in states
                               ]  # For later 1hot preprocessing
        return training_state_list, states

    def convert(state_list, preprocessor):
        """Convert states to 1-hot and concatenate. X's are game state objects.
        """

        states = np.concatenate(
            [preprocessor.state_to_tensor(state) for state in state_list],
            axis=0)
        return states

    # Lists of game training pairs (1-hot)
    preprocessor = Preprocess(features)
    states = [GameState() for _ in xrange(batch_size)]

    # play player_SL moves
    for _ in xrange(i_rand_move - 1):
        # Get moves (batch)
        batch_moves = player_SL.get_moves(states)
        # Do moves (black)
        states = do_move(states, batch_moves)

    # remove games that are finished
    states = [state for state in states if not state.is_end_of_game()]

    # Make random move
    states_list, states = do_rand_move(states)

    # color is random move player color
    color = WHITE if i_rand_move % 2 == 0 else BLACK

    # play moves with player_RL till game ends
    while True:
        # Get moves (batch)
        batch_moves = player_RL.get_moves(states)
        # Do moves (black)
        states = do_move(states, batch_moves)

        # check if all games are finished
        done = [st.is_end_of_game() for st in states]

        if all(done):
            break

    if sgf_path is not None:
        # number different sgf
        sgf_id = next_idx

        for gm in states:
            # add leading '0'
            file_name = str(sgf_id)
            while len(file_name) < 10:
                file_name = '0' + file_name

            # determine winner
            winner_game = 'WHITE' if gm.get_winner_color(
            ) == WHITE else 'BLACK'
            random_player = 'WHITE' if color == WHITE else 'BLACK'

            # generate file name
            file_name += '_winner_' + winner_game + '_active-player_' + \
                         random_player + '_move_' + str(i_rand_move) + '.sgf'
            # save sgf
            save_gamestate_to_sgf(gm,
                                  sgf_path,
                                  file_name,
                                  result=winner_game + ' ' + str(i_rand_move))
            # increment sgf id count
            sgf_id += 1

    # Concatenate training examples
    training_states = convert(states_list, preprocessor)

    # get winners list relative to 'random move' player color (color)
    # winner BLACK & color Black -> WIN
    # winner WHITE & color WHITE -> WIN
    # winner BLACK & color WHITE -> LOSE
    # winner WHITE & color Black -> LOSE
    actual_batch_size = len(states)
    winners = np.array([
        WIN if st.get_winner_color() == color else LOSE for st in states
    ]).reshape(actual_batch_size, 1)
    return training_states, winners
示例#39
0
	def __init__(self, features):
		self.feature_processor = Preprocess(features)
		self.n_features = self.feature_processor.output_dim
示例#40
0
class game_converter:

	def __init__(self, features):
		self.feature_processor = Preprocess(features)
		self.n_features = self.feature_processor.output_dim

	def convert_game(self, file_name, bd_size):
		"""Read the given SGF file into an iterable of (input,output) pairs
		for neural network training

		Each input is a GameState converted into one-hot neural net features
		Each output is an action as an (x,y) pair (passes are skipped)

		If this game's size does not match bd_size, a SizeMismatchError is raised
		"""

		with open(file_name, 'r') as file_object:
			state_action_iterator = sgf_iter_states(file_object.read(), include_end=False)

		for (state, move, player) in state_action_iterator:
			if state.size != bd_size:
				raise SizeMismatchError()
			if move != go.PASS_MOVE:
				nn_input = self.feature_processor.state_to_tensor(state)
				yield (nn_input, move)

	def sgfs_to_hdf5(self, sgf_files, hdf5_file, bd_size=19, ignore_errors=True, verbose=False):
		"""Convert all files in the iterable sgf_files into an hdf5 group to be stored in hdf5_file

		Arguments:
		- sgf_files : an iterable of relative or absolute paths to SGF files
		- hdf5_file : the name of the HDF5 where features will be saved
		- bd_size : side length of board of games that are loaded
		- ignore_errors : if True, issues a Warning when there is an unknown exception rather than halting. Note
			that sgf.ParseException and go.IllegalMove exceptions are always skipped

		The resulting file has the following properties:
			states  : dataset with shape (n_data, n_features, board width, board height)
			actions : dataset with shape (n_data, 2) (actions are stored as x,y tuples of where the move was played)
			file_offsets : group mapping from filenames to tuples of (index, length)

		For example, to find what positions in the dataset come from 'test.sgf':
			index, length = file_offsets['test.sgf']
			test_states = states[index:index+length]
			test_actions = actions[index:index+length]
		"""
		# TODO - also save feature list

		# make a hidden temporary file in case of a crash.
		# on success, this is renamed to hdf5_file
		tmp_file = os.path.join(os.path.dirname(hdf5_file), ".tmp." + os.path.basename(hdf5_file))
		h5f = h5.File(tmp_file, 'w')

		try:
			# see http://docs.h5py.org/en/latest/high/group.html#Group.create_dataset
			states = h5f.require_dataset(
				'states',
				dtype=np.uint8,
				shape=(1, self.n_features, bd_size, bd_size),
				maxshape=(None, self.n_features, bd_size, bd_size),  # 'None' dimension allows it to grow arbitrarily
				exact=False,                                         # allow non-uint8 datasets to be loaded, coerced to uint8
				chunks=(64, self.n_features, bd_size, bd_size),      # approximately 1MB chunks
				compression="lzf")
			actions = h5f.require_dataset(
				'actions',
				dtype=np.uint8,
				shape=(1, 2),
				maxshape=(None, 2),
				exact=False,
				chunks=(1024, 2),
				compression="lzf")
			# 'file_offsets' is an HDF5 group so that 'file_name in file_offsets' is fast
			file_offsets = h5f.require_group('file_offsets')

			if verbose:
				print("created HDF5 dataset in {}".format(tmp_file))

			next_idx = 0
			for file_name in sgf_files:
				if verbose:
					print(file_name)
				# count number of state/action pairs yielded by this game
				n_pairs = 0
				file_start_idx = next_idx
				try:
					for state, move in self.convert_game(file_name, bd_size):
						if next_idx >= len(states):
							states.resize((next_idx + 1, self.n_features, bd_size, bd_size))
							actions.resize((next_idx + 1, 2))
						states[next_idx] = state
						actions[next_idx] = move
						n_pairs += 1
						next_idx += 1
				except go.IllegalMove:
					warnings.warn("Illegal Move encountered in %s\n\tdropping the remainder of the game" % file_name)
				except sgf.ParseException:
					warnings.warn("Could not parse %s\n\tdropping game" % file_name)
				except SizeMismatchError:
					warnings.warn("Skipping %s; wrong board size" % file_name)
				except Exception as e:
					# catch everything else
					if ignore_errors:
						warnings.warn("Unkown exception with file %s\n\t%s" % (file_name, e), stacklevel=2)
					else:
						raise e
				finally:
					if n_pairs > 0:
						# '/' has special meaning in HDF5 key names, so they are replaced with ':' here
						file_name_key = file_name.replace('/', ':')
						file_offsets[file_name_key] = [file_start_idx, n_pairs]
						if verbose:
							print("\t%d state/action pairs extracted" % n_pairs)
					elif verbose:
						print("\t-no usable data-")
		except Exception as e:
			print("sgfs_to_hdf5 failed")
			os.remove(tmp_file)
			raise e

		if verbose:
			print("finished. renaming %s to %s" % (tmp_file, hdf5_file))

		# processing complete; rename tmp_file to hdf5_file
		h5f.close()
		os.rename(tmp_file, hdf5_file)
示例#41
0
def make_training_pairs(player, opp, features, mini_batch_size, board_size=19):
    """Make training pairs for batch of matches, utilizing player.get_moves (parallel form of
	player.get_move), which calls `CNNPolicy.batch_eval_state`.

	Args:
	player -- player that we're always updating
	opp -- batch opponent
	feature_list -- game features to be one-hot encoded
	mini_batch_size -- number of games in mini-batch

	Return:
	X_list -- list of 1-hot board states associated with moves.
	y_list -- list of 1-hot moves associated with board states.
	winners -- list of winners associated with each game in batch
	"""
    def do_move(states, states_prev, moves, X_list, y_list, player_color):
        bsize_flat = bsize * bsize
        for st, st_prev, mv, X, y in zip(states, states_prev, moves, X_list,
                                         y_list):
            if not st.is_end_of_game:
                # Only do more moves if not end of game already
                st.do_move(mv)
                if st.current_player != player_color and mv is not go.PASS_MOVE:
                    # Convert move to one-hot
                    state_1hot = preprocessor.state_to_tensor(st_prev)
                    move_1hot = np.zeros(bsize_flat)
                    move_1hot[flatten_idx(mv, bsize)] = 1
                    X.append(state_1hot)
                    y.append(move_1hot)
        return states, X_list, y_list

    # Lists of game training pairs (1-hot)
    X_list = [list() for _ in xrange(mini_batch_size)]
    y_list = [list() for _ in xrange(mini_batch_size)]
    preprocessor = Preprocess(features)
    bsize = player.policy.model.input_shape[-1]
    states = [GameState(size=board_size) for i in xrange(mini_batch_size)]
    # Randomly choose who goes first (i.e. color of 'player')
    player_color = np.random.choice([go.BLACK, go.WHITE])
    player1, player2 = (player, opp) if player_color == go.BLACK else \
     (opp, player)
    while True:
        # Cache states before moves
        states_prev = [st.copy() for st in states]
        # Get moves (batch)
        moves_black = player1.get_moves(states)
        # Do moves (black)
        states, X_list, y_list = do_move(states, states_prev, moves_black,
                                         X_list, y_list, player_color)
        # Do moves (white)
        moves_white = player2.get_moves(states)
        states, X_list, y_list = do_move(states, states_prev, moves_white,
                                         X_list, y_list, player_color)
        # If all games have ended, we're done. Get winners.
        done = [st.is_end_of_game for st in states]
        if all(done):
            break
    winners = [st.get_winner() for st in states]
    # Concatenate tensors across turns within each game
    for i in xrange(mini_batch_size):
        X_list[i] = np.concatenate(X_list[i], axis=0)
        y_list[i] = np.vstack(y_list[i])
    return X_list, y_list, winners
def run_training(cmd_line_args=None):
    """Run training. command-line args may be passed in as a list
    """
    import argparse
    parser = argparse.ArgumentParser(description='Perform supervised training on a policy network.')
    # required args
    parser.add_argument("model", help="Path to a JSON model file (i.e. from CNNPolicy.save_model())")  # noqa: E501
    parser.add_argument("train_data", help="A .h5 file of training data")
    parser.add_argument("out_directory", help="directory where metadata and weights will be saved")
    # frequently used args
    parser.add_argument("--minibatch", "-B", help="Size of training data minibatches. Default: 16", type=int, default=16)  # noqa: E501
    parser.add_argument("--epochs", "-E", help="Total number of iterations on the data. Default: 10", type=int, default=10)  # noqa: E501
    parser.add_argument("--epoch-length", "-l", help="Number of training examples considered 'one epoch'. Default: # training data", type=int, default=None)  # noqa: E501
    parser.add_argument("--learning-rate", "-r", help="Learning rate - how quickly the model learns at first. Default: .03", type=float, default=.03)  # noqa: E501
    parser.add_argument("--decay", "-d", help="The rate at which learning decreases. Default: .0001", type=float, default=.0001)  # noqa: E501
    parser.add_argument("--verbose", "-v", help="Turn on verbose mode", default=False, action="store_true")  # noqa: E501
    # slightly fancier args
    parser.add_argument("--weights", help="Name of a .h5 weights file (in the output directory) to load to resume training", default=None)  # noqa: E501
    parser.add_argument("--train-val-test", help="Fraction of data to use for training/val/test. Must sum to 1. Invalid if restarting training", nargs=3, type=float, default=[0.93, .05, .02])  # noqa: E501
    parser.add_argument("--symmetries", help="Comma-separated list of transforms, subset of noop,rot90,rot180,rot270,fliplr,flipud,diag1,diag2", default='noop,rot90,rot180,rot270,fliplr,flipud,diag1,diag2')  # noqa: E501
    # TODO - an argument to specify which transformations to use, put it in metadata

    if cmd_line_args is None:
        args = parser.parse_args()
    else:
        args = parser.parse_args(cmd_line_args)

    # TODO - what follows here should be refactored into a series of small functions

    resume = args.weights is not None

    if args.verbose:
        if resume:
            print("trying to resume from %s with weights %s" %
                  (args.out_directory, os.path.join(args.out_directory, args.weights)))
        else:
            if os.path.exists(args.out_directory):
                print("directory %s exists. any previous data will be overwritten" %
                      args.out_directory)
            else:
                print("starting fresh output directory %s" % args.out_directory)

    # load model from json spec
    policy = CNNPolicy.load_model(args.model)
    model_features = policy.preprocessor.feature_list
    model = policy.model
    if resume:
        model.load_weights(os.path.join(args.out_directory, args.weights))

    # features of training data
    dataset = h5.File(args.train_data)

    # Verify that dataset's features match the model's expected features.
    if 'features' in dataset:
        dataset_features = dataset['features'][()]
        dataset_features = dataset_features.split(",")
        if len(dataset_features) != len(model_features) or \
           any(df != mf for (df, mf) in zip(dataset_features, model_features)):
            raise ValueError("Model JSON file expects features \n\t%s\n"
                             "But dataset contains \n\t%s" % ("\n\t".join(model_features),
                                                              "\n\t".join(dataset_features)))
        elif args.verbose:
            print("Verified that dataset features and model features exactly match.")
    else:
        # Cannot check each feature, but can check number of planes.
        n_dataset_planes = dataset["states"].shape[1]
        tmp_preprocess = Preprocess(model_features)
        n_model_planes = tmp_preprocess.output_dim
        if n_dataset_planes != n_model_planes:
            raise ValueError("Model JSON file expects a total of %d planes from features \n\t%s\n"
                             "But dataset contains %d planes" % (n_model_planes,
                                                                 "\n\t".join(model_features),
                                                                 n_dataset_planes))
        elif args.verbose:
            print("Verified agreement of number of model and dataset feature planes, but cannot "
                  "verify exact match using old dataset format.")

    n_total_data = len(dataset["states"])
    n_train_data = int(args.train_val_test[0] * n_total_data)
    # Need to make sure training data is divisible by minibatch size or get
    # warning mentioning accuracy from keras
    n_train_data = n_train_data - (n_train_data % args.minibatch)
    n_val_data = n_total_data - n_train_data
    # n_test_data = n_total_data - (n_train_data + n_val_data)

    if args.verbose:
        print("datset loaded")
        print("\t%d total samples" % n_total_data)
        print("\t%d training samples" % n_train_data)
        print("\t%d validaion samples" % n_val_data)

    # ensure output directory is available
    if not os.path.exists(args.out_directory):
        os.makedirs(args.out_directory)

    # create metadata file and the callback object that will write to it
    meta_file = os.path.join(args.out_directory, "metadata.json")
    meta_writer = MetadataWriterCallback(meta_file)
    # load prior data if it already exists
    if os.path.exists(meta_file) and resume:
        with open(meta_file, "r") as f:
            meta_writer.metadata = json.load(f)
        if args.verbose:
            print("previous metadata loaded: %d epochs. new epochs will be appended." %
                  len(meta_writer.metadata["epochs"]))
    elif args.verbose:
        print("starting with empty metadata")
    # the MetadataWriterCallback only sets 'epoch' and 'best_epoch'. We can add
    # in anything else we like here
    #
    # TODO - model and train_data are saved in meta_file; check that they match
    # (and make args optional when restarting?)
    meta_writer.metadata["training_data"] = args.train_data
    meta_writer.metadata["model_file"] = args.model
    # Record all command line args in a list so that all args are recorded even
    # when training is stopped and resumed.
    meta_writer.metadata["cmd_line_args"] = meta_writer.metadata.get("cmd_line_args", [])
    meta_writer.metadata["cmd_line_args"].append(vars(args))

    # create ModelCheckpoint to save weights every epoch
    checkpoint_template = os.path.join(args.out_directory, "weights.{epoch:05d}.hdf5")
    checkpointer = ModelCheckpoint(checkpoint_template)

    # load precomputed random-shuffle indices or create them
    # TODO - save each train/val/test indices separately so there's no danger of
    # changing args.train_val_test when resuming
    shuffle_file = os.path.join(args.out_directory, "shuffle.npz")
    if os.path.exists(shuffle_file) and resume:
        with open(shuffle_file, "r") as f:
            shuffle_indices = np.load(f)
        if args.verbose:
            print("loading previous data shuffling indices")
    else:
        # create shuffled indices
        shuffle_indices = np.random.permutation(n_total_data)
        with open(shuffle_file, "w") as f:
            np.save(f, shuffle_indices)
        if args.verbose:
            print("created new data shuffling indices")
    # training indices are the first consecutive set of shuffled indices, val
    # next, then test gets the remainder
    train_indices = shuffle_indices[0:n_train_data]
    val_indices = shuffle_indices[n_train_data:n_train_data + n_val_data]
    # test_indices = shuffle_indices[n_train_data + n_val_data:]

    symmetries = [BOARD_TRANSFORMATIONS[name] for name in args.symmetries.strip().split(",")]

    # create dataset generators
    train_data_generator = shuffled_hdf5_batch_generator(
        dataset["states"],
        dataset["actions"],
        train_indices,
        args.minibatch,
        symmetries)
    val_data_generator = shuffled_hdf5_batch_generator(
        dataset["states"],
        dataset["actions"],
        val_indices,
        args.minibatch,
        symmetries)

    sgd = SGD(lr=args.learning_rate, decay=args.decay)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=["accuracy"])

    samples_per_epoch = args.epoch_length or n_train_data

    if args.verbose:
        print("STARTING TRAINING")

    model.fit_generator(
        generator=train_data_generator,
        samples_per_epoch=samples_per_epoch,
        nb_epoch=args.epochs,
        callbacks=[checkpointer, meta_writer],
        validation_data=val_data_generator,
        nb_val_samples=n_val_data)
示例#43
0
class NeuralNetBase(object):
    """Base class for neural network classes handling feature processing, construction
    of a 'forward' function, etc.
    """

    # keep track of subclasses to make generic saving/loading cleaner.
    # subclasses can be 'registered' with the @neuralnet decorator
    subclasses = {}

    def __init__(self, feature_list, **kwargs):
        """create a neural net object that preprocesses according to feature_list and uses
        a neural network specified by keyword arguments (using subclass' create_network())

        optional argument: init_network (boolean). If set to False, skips initializing
        self.model and self.forward and the calling function should set them.
        """
        defaults = {"board": 19}
        defaults.update(kwargs)
        self.preprocessor = Preprocess(feature_list, size=defaults["board"])
        kwargs["input_dim"] = self.preprocessor.get_output_dimension()

        if kwargs.get('init_network', True):
            # self.__class__ refers to the subclass so that subclasses only
            # need to override create_network()
            self.model = self.__class__.create_network(**kwargs)
            # self.forward is a lambda function wrapping a Keras function
            self.forward = self._model_forward()

    def _model_forward(self):
        """Construct a function using the current keras backend that, when given a batch
        of inputs, simply processes them forward and returns the output

        This is as opposed to model.compile(), which takes a loss function
        and training method.

        c.f. https://github.com/fchollet/keras/issues/1426
        """
        # The uses_learning_phase property is True if the model contains layers that behave
        # differently during training and testing, e.g. Dropout or BatchNormalization.
        # In these cases, K.learning_phase() is a reference to a backend variable that should
        # be set to 0 when using the network in prediction mode and is automatically set to 1
        # during training.
        if self.model.uses_learning_phase:
            forward_function = K.function(
                [self.model.input, K.learning_phase()], [self.model.output])

            # the forward_function returns a list of tensors
            # the first [0] gets the front tensor.
            return lambda inpt: forward_function([inpt, 0])[0]
        else:
            # identical but without a second input argument for the learning phase
            forward_function = K.function([self.model.input],
                                          [self.model.output])
            return lambda inpt: forward_function([inpt])[0]

    @staticmethod
    def load_model(json_file):
        """create a new neural net object from the architecture specified in json_file
        """
        with open(json_file, 'r') as f:
            object_specs = json.load(f)

        # Create object; may be a subclass of networks saved in specs['class']
        class_name = object_specs.get('class', 'CNNPolicy')
        try:
            network_class = NeuralNetBase.subclasses[class_name]
        except KeyError:
            raise ValueError(
                "Unknown neural network type in json file: {}\n"
                "(was it registered with the @neuralnet decorator?)".format(
                    class_name))

        # create new object
        new_net = network_class(object_specs['feature_list'],
                                init_network=False)

        new_net.model = model_from_json(object_specs['keras_model'],
                                        custom_objects={'Bias': Bias})
        if 'weights_file' in object_specs:
            new_net.model.load_weights(object_specs['weights_file'])
        new_net.forward = new_net._model_forward()
        return new_net

    def save_model(self, json_file, weights_file=None):
        """write the network model and preprocessing features to the specified file

        If a weights_file (.hdf5 extension) is also specified, model weights are also
        saved to that file and will be reloaded automatically in a call to load_model
        """
        # this looks odd because we are serializing a model with json as a string
        # then making that the value of an object which is then serialized as
        # json again.
        # It's not as crazy as it looks. A Network has 2 moving parts - the
        # feature preprocessing and the neural net, each of which gets a top-level
        # entry in the saved file. Keras just happens to serialize models with JSON
        # as well. Note how this format makes load_model fairly clean as well.
        object_specs = {
            'class': self.__class__.__name__,
            'keras_model': self.model.to_json(),
            'feature_list': self.preprocessor.get_feature_list()
        }
        if weights_file is not None:
            self.model.save_weights(weights_file)
            object_specs['weights_file'] = weights_file
        # use the json module to write object_specs to file
        with open(json_file, 'w') as f:
            json.dump(object_specs, f)
def generate_data(player_RL, player_SL, hdf5_file, n_training_pairs,
                  batch_size, bd_size, features, verbose, sgf_path):
    # used features
    n_features = Preprocess(features).get_output_dimension()
    # temporary hdf5 file
    tmp_file = os.path.join(os.path.dirname(hdf5_file),
                            ".tmp." + os.path.basename(hdf5_file))
    # open hdf5 file
    h5f = h5py.File(tmp_file, 'w')
    # initialize a new hdf5 file
    h5_states, h5_winners = init_hdf5(h5f, n_features, bd_size)

    # random move distribution administration
    distribution = {key: 0 for key in range(DEAULT_RANDOM_MOVE)}

    if verbose:
        print(str(hdf5_file) + " file initialized.")
        max_value = str(n_training_pairs)

    next_idx = 0
    while True:
        # Randomly choose turn to play uniform random. Move prior will be from SL
        # policy. Moves after will be from RL policy.
        i_rand_move = np.random.choice(range(DEAULT_RANDOM_MOVE))

        # play games
        states, winners = play_batch(player_RL, player_SL, batch_size,
                                     features, i_rand_move, next_idx, sgf_path)

        if states is not None:
            try:
                # get actual batch size in case any pair was removed
                actual_batch_size = len(states)
                # increment random distribution
                distribution[i_rand_move] += actual_batch_size

                # add states and winners to hdf5 file
                h5_states.resize((next_idx + actual_batch_size, n_features,
                                  bd_size, bd_size))
                h5_winners.resize((next_idx + actual_batch_size, 1))
                h5_states[next_idx:] = states
                h5_winners[next_idx:] = winners

                # count saved pairs
                next_idx += actual_batch_size
            except Exception as e:
                warnings.warn(
                    "Unknown error occured during batch save to HDF5 file: {}".
                    format(hdf5_file))  # noqa: E501
                raise e

        if verbose:
            # primitive progress indication
            current = str(next_idx)
            while len(current) < len(max_value):
                current = ' ' + current

            line = 'Progress: ' + current + '/' + max_value

            sys.stdout.write('\b' * len(line))
            sys.stdout.write('\r')
            sys.stdout.write(line)
            sys.stdout.flush()

        # stop data generation when at least n_trainings_pairs have been created
        if n_training_pairs <= next_idx:
            break

    # processing complete: rename tmp_file to hdf5_file
    h5f.close()
    os.rename(tmp_file, hdf5_file)
    if verbose:
        print("Value training data succesfull created.")

        # show random move distribution
        print("\nRandom move distribution:")
        for key in range(DEAULT_RANDOM_MOVE):
            print("Random move: " + str(key) + " " + str(distribution[key]))
示例#45
0
class CNNPolicy(object):
    """uses a convolutional neural network to evaluate the state of the game
	and compute a probability distribution over the next action
	"""
    def __init__(self, feature_list, **kwargs):
        """create a policy object that preprocesses according to feature_list and uses
		a neural network specified by keyword arguments (see create_network())
		"""
        self.preprocessor = Preprocess(feature_list)
        kwargs["input_dim"] = self.preprocessor.output_dim
        self.model = CNNPolicy.create_network(**kwargs)
        self.forward = self._model_forward()

    def _model_forward(self):
        """Construct a function using the current keras backend that, when given a batch
		of inputs, simply processes them forward and returns the output

		The output has size (batch x 361) for 19x19 boards (i.e. the output is a batch
		of distributions over flattened boards. See AlphaGo.util#flatten_idx)

		This is as opposed to model.compile(), which takes a loss function
		and training method.

		c.f. https://github.com/fchollet/keras/issues/1426
		"""
        forward_function = K.function([self.model.input], [self.model.output])

        # the forward_function returns a list of tensors
        # the first [0] gets the front tensor.
        return lambda inpt: forward_function([inpt])[0]

    def batch_eval_state(self, state_gen, batch=16):
        """Given a stream of states in state_gen, evaluates them in batches
		to make best use of GPU resources.

		Returns: TBD (stream of results? that would break zip().
			streaming pairs of pre-zipped (state, result)?)
		"""
        raise NotImplementedError()

    def eval_state(self, state, moves=None):
        """Given a GameState object, returns a list of (action, probability) pairs
		according to the network outputs

		If a list of moves is specified, only those moves are kept in the distribution
		"""
        tensor = self.preprocessor.state_to_tensor(state)

        # run the tensor through the network
        network_output = self.forward(tensor)

        moves = moves or state.get_legal_moves()
        move_indices = [flatten_idx(m, state.size) for m in moves]

        # get network activations at legal move locations
        # note: may not be a proper distribution by ignoring illegal moves
        distribution = network_output[0][move_indices]
        distribution = distribution / distribution.sum()
        return zip(moves, distribution)

    @staticmethod
    def create_network(**kwargs):
        """construct a convolutional neural network.

		Keword Arguments:
		- input_dim:         	depth of features to be processed by first layer (no default)
		- board:             	width of the go board to be processed (default 19)
		- filters_per_layer: 	number of filters used on every layer (default 128)
		- layers:            	number of convolutional steps (default 12)
		- filter_width_K:    	(where K is between 1 and <layers>) width of filter on
								layer K (default 3 except 1st layer which defaults to 5).
								Must be odd.
		"""
        defaults = {
            "board": 19,
            "filters_per_layer": 128,
            "layers": 12,
            "filter_width_1": 5
        }
        # copy defaults, but override with anything in kwargs
        params = defaults
        params.update(kwargs)

        # create the network:
        # a series of zero-paddings followed by convolutions
        # such that the output dimensions are also board x board
        network = Sequential()

        # create first layer
        network.add(
            convolutional.Convolution2D(input_shape=(params["input_dim"],
                                                     params["board"],
                                                     params["board"]),
                                        nb_filter=params["filters_per_layer"],
                                        nb_row=params["filter_width_1"],
                                        nb_col=params["filter_width_1"],
                                        init='uniform',
                                        activation='relu',
                                        border_mode='same'))

        # create all other layers
        for i in range(2, params["layers"] + 1):
            # use filter_width_K if it is there, otherwise use 3
            filter_key = "filter_width_%d" % i
            filter_width = params.get(filter_key, 3)
            network.add(
                convolutional.Convolution2D(
                    nb_filter=params["filters_per_layer"],
                    nb_row=filter_width,
                    nb_col=filter_width,
                    init='uniform',
                    activation='relu',
                    border_mode='same'))

        # the last layer maps each <filters_per_layer> featuer to a number
        network.add(
            convolutional.Convolution2D(nb_filter=1,
                                        nb_row=1,
                                        nb_col=1,
                                        init='uniform',
                                        border_mode='same'))
        # reshape output to be board x board
        network.add(Flatten())
        # softmax makes it into a probability distribution
        network.add(Activation('softmax'))

        return network

    @staticmethod
    def load_model(json_file):
        """create a new CNNPolicy object from the architecture specified in json_file
		"""
        with open(json_file, 'r') as f:
            object_specs = json.load(f)
        new_policy = CNNPolicy(object_specs['feature_list'])
        new_policy.model = model_from_json(object_specs['keras_model'])
        new_policy.forward = new_policy._model_forward()
        return new_policy

    def save_model(self, json_file):
        """write the network model and preprocessing features to the specified file
		"""
        # this looks odd because we are serializing a model with json as a string
        # then making that the value of an object which is then serialized as
        # json again.
        # It's not as crazy as it looks. A CNNPolicy has 2 moving parts - the
        # feature preprocessing and the neural net, each of which gets a top-level
        # entry in the saved file. Keras just happens to serialize models with JSON
        # as well. Note how this format makes load_model fairly clean as well.
        object_specs = {
            'keras_model': self.model.to_json(),
            'feature_list': self.preprocessor.feature_list
        }
        # use the json module to write object_specs to file
        with open(json_file, 'w') as f:
            json.dump(object_specs, f)