class ConnectFourBotNN(gym.Env): """ Description: ConnectFour game bot """ def __init__(self, w, b): self.board = ConnectFourEnv() self.NN = Linear(w, b) def nextMove(self): boardvalues = self.board.board.flatten() mytensor = tf.constant(boardvalues, dtype='float32') x = self.NN([mytensor]) available_moves = self.board.available_moves() for i in reversed(argsort(x)): for j in i: for s in available_moves: if j == s: return j def inform(self, action): self.board.step(action) def reset(self): self.board = ConnectFourEnv()
class ConnectFourBotRandom(gym.Env): """ Description: ConnectFour game bot """ def __init__(self): self.board = ConnectFourEnv() def nextMove(self): x = list(self.board.available_moves()) shuffle(x) return x[0] def inform(self, action): self.board.step(action) def reset(self): self.board = ConnectFourEnv()
b_init = tf.zeros_initializer() b = tf.Variable(initial_value=b_init(shape=(7, ), dtype='float32'), trainable=True) bot2 = ConnectFourBotRandom() bot1 = ConnectFourBotNN(w, b) numberiter = 2 number_ga_configs = 2 results_log = np.array([]) gameswon = 0 gamesundecided = 0 maximum = np.array([]) for x in range(number_ga_configs): for x in range(numberiter): board = ConnectFourEnv() bot1.reset() bot2.reset() current_player = 1 while not (board.is_win_state()): if current_player == 1: move = bot1.nextMove() else: move = bot2.nextMove() current_player *= -1 if board.is_valid_action(move): board.step(move) bot1.inform(move) bot2.inform(move) if len(board.available_moves()) == 0: break
def reset(self): self.board = ConnectFourEnv()
def __init__(self, w, b): self.board = ConnectFourEnv() self.NN = Linear(w, b)
def __init__(self): self.board = ConnectFourEnv()
def test_available_moves(): env = ConnectFourEnv() env.reset(BOARD_AVAILABLE_0123) assert set(env.available_moves()) == {0, 1, 2, 3} env.reset(BOARD_AVAILABLE_2) assert set(env.available_moves()) == {2} env.reset(BOARD_AVAILABLE_6) assert set(env.available_moves()) == {6} env.reset(BOARD_AVAILABLE_NONE) assert set(env.available_moves()) == set([])
def test_is_win_state(): env = ConnectFourEnv() env.reset(BOARD_WIN_ROW) assert env.is_win_state() env.reset(BOARD_WIN_COLUMN) assert env.is_win_state() env.reset(BOARD_WIN_DIAGONAL) assert env.is_win_state() env.reset(BOARD_WIN_BDIAGONAL) assert env.is_win_state()
def test_is_valid_action(): env = ConnectFourEnv() env.reset(BOARD_VALIDATION) assert env.is_valid_action(0) assert not env.is_valid_action(3)