def test_is_empty_hole(): mancala = Mancala() assert alpha_beta_pruning.is_empty_hole(mancala, 0) == False mancala = Mancala(7, 7, [1, 9, 8, 8, 0, 9, 9, 2, 1, 1, 10, 10, 10, 9, 9, 2]) assert alpha_beta_pruning.is_empty_hole(mancala, 4) == True assert alpha_beta_pruning.is_empty_hole(mancala, 5) == False
def test_is_terminal_node(): mancala = Mancala() assert alpha_beta_pruning.is_terminal_node(mancala) == False ancala = Mancala(7, 7, [1, 9, 8, 8, 0, 9, 9, 2, 1, 1, 10, 10, 10, 9, 9, 2]) assert alpha_beta_pruning.is_terminal_node(mancala) == False mancala = Mancala(7, 7, [0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 10, 10, 10, 9, 9, 2]) assert alpha_beta_pruning.is_terminal_node(mancala) == True
def test_play(self): # This does the major brunt of the testing, I can't really be bothered to write tests for # remove_slam and update_actions since they're all heavily tied to each other # Test 1 - Valid Move self.m.play(0, 1) self.assertEqual([0, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4], self.m.board, "Test 1 - Valid Move") self.m.play(7, 2) self.assertEqual([0, 5, 5, 5, 5, 4, 4, 0, 5, 5, 5, 5], self.m.board, "Test 1 - Valid Move") self.m = Mancala() self.m.play(0, 1) # Test 2 - Invalid move self.invalid_play(self.m, 0, 1) self.assertEqual([0, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4], self.m.board, "Test 2 - Invalid move") # Test 3 - Wrap around move self.m.play(11, 2) self.assertEqual([1, 6, 6, 6, 5, 4, 4, 4, 4, 4, 4, 0], self.m.board, "Test 3 - Wrap around move") # Test 4 - Player 1 must feed # -- new board -- self.m.board = [1, 6, 6, 6, 5, 4, 0, 0, 0, 0, 0, 0] self.m.update_actions(1) self.assertEqual([1, 2, 3, 4, 5], self.m.actions, "Test 4 - Player 1 must feed") # Test 5 - Invalid move self.invalid_play(self.m, 9, 1) self.assertEqual([1, 6, 6, 6, 5, 4, 0, 0, 0, 0, 0, 0], self.m.board, "Test 5 - Invalid move") # -- new board -- # Test 6 - Grand slam detection for p1 self.m.board = [1, 0, 2, 0, 0, 6, 2, 1, 2, 2, 1, 2] self.m.update_actions(1) self.assertEqual([0, 2], self.m.get_actions(1), "Test 6 - Grand slam detection for p1") self.invalid_play(self.m, 5, 1) self.assertEqual([1, 0, 2, 0, 0, 6, 2, 1, 2, 2, 1, 2], self.m.board) # -- new board -- # Test 7 - Grand slam detection for p2 self.m.board = [2, 1, 2, 2, 1, 2, 1, 0, 2, 0, 0, 6] self.m.update_actions(2) self.assertEqual(self.m.get_actions(2), [6, 8], "Test 7 - Grand slam detection for p2") self.invalid_play(self.m, 11, 2) self.assertEqual(self.m.board, [2, 1, 2, 2, 1, 2, 1, 0, 2, 0, 0, 6]) # Test 8 - Captures self.m.board = [4, 4, 4, 3, 2, 3, 2, 1, 2, 2, 1, 2] self.m.update_actions(1) self.m.play(5, 1) self.assertEqual(8, self.m.p1_store) # TEST self.m.board = [4, 5, 2, 0, 3, 3, 3, 3, 2, 2, 0, 4] self.m.update_actions(1) self.m.play(5, 1) self.assertEqual(False, self.m.is_terminal) print(self.m.board)
def test_get_north_scores(): mancala = Mancala() assert alpha_beta_pruning.get_north_scores(mancala) == 0 mancala = Mancala(7, 7, [1, 9, 8, 8, 0, 9, 9, 2, 1, 1, 10, 10, 10, 9, 9, 2]) assert alpha_beta_pruning.get_north_scores(mancala) == 2 mancala = Mancala(7, 7, [1, 9, 8, 8, 0, 9, 9, 8, 1, 1, 10, 10, 10, 9, 9, 4]) assert alpha_beta_pruning.get_north_scores(mancala) == 8
def test_get_heuristics(): mancala = Mancala() assert alpha_beta_pruning.get_heuristics(mancala) == 0 mancala = Mancala(7, 7, [1, 9, 8, 8, 0, 9, 9, 2, 1, 1, 10, 10, 10, 9, 9, 2]) assert alpha_beta_pruning.get_heuristics(mancala) == 0 mancala = Mancala(7, 7, [1, 9, 8, 8, 0, 9, 9, 8, 1, 1, 10, 10, 10, 9, 9, 4]) assert alpha_beta_pruning.get_heuristics(mancala) == -4
def play_game(): # Create model path if doesn't exist base_cwd = os.getcwd() model_dir = base_cwd + "\\model" if not os.path.exists(model_dir): os.mkdir(model_dir) model_path = model_dir + "\\mancala_agent.pkl" loaded_agent = Agent(load_agent_path=model_path) environment = Mancala(loaded_agent) environment.play_game() return 0
def gens(self, node, depth, iteration): n = Node(Mancala()) n1 = Node(Mancala()) n.parent = node n1.parent = node n.actions = [0, 1] n1.actions = [0, 1] node.childs = {0: n, 1: n1} if depth % 2 == 1: n.player = 2 n1.player = 2 else: n.player = 1 n1.player = 1 if depth == 0: if iteration == -1: iteration = 0 n.leaf = True n1.leaf = True n.actions = [] n1.actions = [] if iteration == 0: n.value = 4 n1.value = 3 elif iteration == 1: n.value = 1 n1.value = 8 elif iteration == 2: n.value = 0 n1.value = 5 elif iteration == 3: n.value = 8 n1.value = 0 elif iteration == 4: n.value = 5 n1.value = 6 elif iteration == 5: n.value = 0 n1.value = 3 elif iteration == 6: n.value = 7 n1.value = 4 elif iteration == 7: n.value = 7 n1.value = 1 return iteration + 1 else: it = self.gens(n, depth - 1, iteration) new_it = self.gens(n1, depth - 1, it) return new_it
def main(): n = Node(Mancala()) while not n.leaf: alphabeta(n, -1000, 1000, 1, 5, True) print(n.manc) a = int(input("Enter action: ")) n.play(a)
def generic(): depth = 3 depth2 = 5000 # for depth in [1,3,5]: # for depth2 in [25,50,100]: print("Format: Depth {0} Iterations {1}".format(depth, depth2)) stats = Measure("REE") player = 1 for i in range(0, NUM_GAMES): print(".", end="") n = Node(Mancala()) stats.start_stats() while not n.leaf: if player % 2 == 1: # random_agent(n) alphabeta(n, NEG_INF, POS_INF, depth, True) # fsss(n, depth) # uct(n, 1, 25, 1) else: # alphabeta(n, NEG_INF, POS_INF, depth2, True) # uct(n, 1, depth2, 1) uct(n, 1, depth2, 0) player += 1 player = 1 # main_log.info("GAME ENDED NOOB") stats.update_stats(n) # print("Game {0} finished. Moves: {1}. Winner: Player {2}".format(i, n.manc.num_moves, # 1 if n.manc.p1_store > n.manc.p2_store else 2)) stats.end_stats()
def get_next_move(self, m: mc.Mancala) -> int: state = m.get_board_status() # state[mc.PLAYER1_BANK] = 0 # state[mc.PLAYER2_BANK] = 0 test_m = mc.Mancala() test_m.set_board_status(self.p, state) best_score = -100 best_move = -1 moves = test_m.get_valid_moves() for i in moves: # check the minimax value of each valid move # print("top level: move %i" % i) test_m.set_board_status(self.p, state) test_m.play_turn(i) test_score = self.minimax(test_m, self._depth) # best_score, best_move = max((best_score, best_move), (test_score, i), key=lambda x: x[0]) if test_score > best_score: best_score = test_score best_move = i print("top level: move %i, score %i" % (i, test_score)) return best_move
def test_get_all_possible_moves(): mancala = Mancala(7, 7, [1, 9, 8, 8, 0, 9, 9, 2, 1, 1, 10, 10, 10, 9, 9, 2]) assert alpha_beta_pruning.get_all_possible_moves( mancala, 'south') == [1, 2, 3, 4, 5, 6, 7] assert alpha_beta_pruning.get_all_possible_moves( mancala, 'north') == [1, 2, 3, 4, 6, 7]
def train_agent(n_games=1, games_per_checkpoint=1, model_save_path='model/mancala_agent.pkl'): # If model already exists, expand on it, otherwise start fresh loaded_agent = Agent(load_agent_path=model_save_path) environment = Mancala(loaded_agent) while n_games > 0: environment.play_game(reinforcement_learning=True) # Checkpoint if n_games % games_per_checkpoint == 0: environment.mancala_agent.save_agent(model_save_path) logging.info('Saved RL Agent Model!') print('Remaining Games: ', n_games) n_games -= 1 # Save final agent model environment.mancala_agent.save_agent(model_save_path) return environment
def __init__(self): self.__mancala = Mancala() super(GameUi, self).__init__() uic.loadUi(r'cw3_ui.ui', self) self.a_studnia.mousePressEvent = self.clicked_studnia_a self.a_0.mousePressEvent = self.clicked_a0 self.a_1.mousePressEvent = self.clicked_a1 self.a_2.mousePressEvent = self.clicked_a2 self.a_3.mousePressEvent = self.clicked_a3 self.a_4.mousePressEvent = self.clicked_a4 self.a_5.mousePressEvent = self.clicked_a5 self.b_studnia.mousePressEvent = self.clicked_studnia_b self.b_0.mousePressEvent = self.clicked_b0 self.b_1.mousePressEvent = self.clicked_b1 self.b_2.mousePressEvent = self.clicked_b2 self.b_3.mousePressEvent = self.clicked_b3 self.b_4.mousePressEvent = self.clicked_b4 self.b_5.mousePressEvent = self.clicked_b5 self.start_game.mousePressEvent = self.start_new_game
def copy_node(n): """ Copies most required data into a new node, this is much faster than performing a deepcopy every time :param n: Node to be copied :return: Copied node """ m = Node(Mancala(), n.player, n.depth, None, n.manc.is_terminal) m.manc.board = copy(n.manc.board) m.manc.actions = copy(n.manc.actions) m.manc.is_terminal = n.manc.is_terminal m.manc.p1_store = n.manc.p1_store m.manc.p2_store = n.manc.p2_store m.manc.num_moves = n.manc.num_moves return m
def random_alphabeta(): # Test 1000 games with random agent stats = Measure("Random v. Alpha-Beta") player = 1 for i in range(0, NUM_GAMES): # main_log.info("--------------------- GAME STARTED ---------------------") n = Node(Mancala()) stats.start_stats() while not n.leaf: if player % 2 == 1: random_agent(n, 1) else: alphabeta(n, -1000, 1000, 2, 3, True) player += 1 stats.update_stats(n) print("Game {0} finished. Moves: {1}. Winner: Player {2}".format( i, n.manc.num_moves, 1 if n.manc.p1_store > n.manc.p2_store else 2)) stats.end_stats()
def alphabeta_fsss(): depth1 = 7 depth2 = 7 stats = Measure("Alpha-Beta v. FSSS") player = 1 for i in range(0, NUM_GAMES): # main_log.info("--------------------- GAME STARTED ---------------------") n = Node(Mancala()) stats.start_stats() while not n.leaf: if player % 2 == 1: main_log.info("Alpha-Beta started") alphabeta(n, -1000, 1000, depth1, True) else: main_log.info("FSSS started") fsss(n, depth2) player += 1 stats.update_stats(n) print("Game {0} finished. Moves: {1}. Winner: Player {2}".format( i, n.manc.num_moves, 1 if n.manc.p1_store > n.manc.p2_store else 2)) stats.end_stats()
def alphabeta_uct(): # alphabeta v uct stats = Measure("Alpha-Beta v. UCT") for i in range(0, NUM_GAMES): # main_log.info("--------------------- GAME STARTED ---------------------") player = 1 n = Node(Mancala()) n.is_root = True stats.start_stats() try: while not n.leaf: if player % 2 == 1: uct(n, 1) else: # alphabeta(m_node, alpha, beta, player, depth) alphabeta(n, -1000, 1000, 2, True) player += 1 stats.update_stats(n) except Exception: stats.end_stats() print("Game {0} finished. Moves: {1}. Winner: Player {2}".format( i, n.manc.num_moves, 1 if n.manc.p1_store > n.manc.p2_store else 2)) stats.end_stats()
def setUp(self): self.m = Mancala() self.g = Mancala()
def start_new_game(self, event): self.__mancala = Mancala() self.__renew_game_state() self.info.setText("Game")
class GameUi(QtWidgets.QMainWindow): def __init__(self): self.__mancala = Mancala() super(GameUi, self).__init__() uic.loadUi(r'cw3_ui.ui', self) self.a_studnia.mousePressEvent = self.clicked_studnia_a self.a_0.mousePressEvent = self.clicked_a0 self.a_1.mousePressEvent = self.clicked_a1 self.a_2.mousePressEvent = self.clicked_a2 self.a_3.mousePressEvent = self.clicked_a3 self.a_4.mousePressEvent = self.clicked_a4 self.a_5.mousePressEvent = self.clicked_a5 self.b_studnia.mousePressEvent = self.clicked_studnia_b self.b_0.mousePressEvent = self.clicked_b0 self.b_1.mousePressEvent = self.clicked_b1 self.b_2.mousePressEvent = self.clicked_b2 self.b_3.mousePressEvent = self.clicked_b3 self.b_4.mousePressEvent = self.clicked_b4 self.b_5.mousePressEvent = self.clicked_b5 self.start_game.mousePressEvent = self.start_new_game def start_new_game(self, event): self.__mancala = Mancala() self.__renew_game_state() self.info.setText("Game") def clicked_studnia_a(self, event): print("clicked studnia a") #self.__mancala.step_player(6, 'a') def clicked_a0(self, event): print("clicked a0") self.__mancala.step_player(0, 'a') self.__renew_game_state() def clicked_a1(self, event): print("clicked a1") self.__mancala.step_player(1, 'a') self.__renew_game_state() def clicked_a2(self, event): print("clicked a2") self.__mancala.step_player(2, 'a') self.__renew_game_state() def clicked_a3(self, event): print("clicked a3") self.__mancala.step_player(3, 'a') self.__renew_game_state() def clicked_a4(self, event): print("clicked a4") self.__mancala.step_player(4, 'a') self.__renew_game_state() def clicked_a5(self, event): print("clicked a5") self.__mancala.step_player(5, 'a') self.__renew_game_state() def clicked_studnia_b(self, event): print("clicked studnia b") #self.__mancala.step_player(6, 'b') def clicked_b0(self, event): print("clicked b0") self.__mancala.step_player(0, 'b') self.__renew_game_state() def clicked_b1(self, event): print("clicked b1") self.__mancala.step_player(1, 'b') self.__renew_game_state() def clicked_b2(self, event): print("clicked b2") self.__mancala.step_player(2, 'b') self.__renew_game_state() def clicked_b3(self, event): print("clicked b3") self.__mancala.step_player(3, 'b') self.__renew_game_state() def clicked_b4(self, event): print("clicked b4") self.__mancala.step_player(4, 'b') self.__renew_game_state() def clicked_b5(self, event): print("clicked b5") self.__mancala.step_player(5, 'b') self.__renew_game_state() def __renew_game_state(self): self.a_studnia.setText( str(self.__mancala.player_a.containers[6].num_stones)) self.a_0.setText(str(self.__mancala.player_a.containers[0].num_stones)) self.a_1.setText(str(self.__mancala.player_a.containers[1].num_stones)) self.a_2.setText(str(self.__mancala.player_a.containers[2].num_stones)) self.a_3.setText(str(self.__mancala.player_a.containers[3].num_stones)) self.a_4.setText(str(self.__mancala.player_a.containers[4].num_stones)) self.a_5.setText(str(self.__mancala.player_a.containers[5].num_stones)) self.b_studnia.setText( str(self.__mancala.player_b.containers[6].num_stones)) self.b_0.setText(str(self.__mancala.player_b.containers[0].num_stones)) self.b_1.setText(str(self.__mancala.player_b.containers[1].num_stones)) self.b_2.setText(str(self.__mancala.player_b.containers[2].num_stones)) self.b_3.setText(str(self.__mancala.player_b.containers[3].num_stones)) self.b_4.setText(str(self.__mancala.player_b.containers[4].num_stones)) self.b_5.setText(str(self.__mancala.player_b.containers[5].num_stones)) if self.__mancala.finish: print("finish game") winner = "Player A" if self.__mancala.player_a.containers[ -1].num_stones > self.__mancala.player_b.containers[ -1].num_stones else "Player B" self.info.setText("{} wins".format(winner))
if side == 'north' and msg_parser.is_our_turn(): message = "SWAP\n" side = "south" server.sendall(message.encode('utf-8')) first_move = False elif side == 'south' and msg_parser.is_start(): message = "MOVE;1\n" server.sendall(message.encode('utf-8')) first_move = False second_move = True elif msg_parser.is_our_turn(): if second_move: if msg_parser.is_swap(): side = "north" second_move = False _, game_board = msg_parser.get_board() mancala = Mancala(7, 7, game_board) move = AlphaPruningAgent(max_depth=2, process_depth=0, thread_depth=0).get_move(mancala, side) message = "MOVE;" + str(move) + "\n" #print(message) server.sendall(message.encode('utf-8')) except Exception as e: print("Exception:" + str(e)) finally: sock.close()
def setUp(self) -> None: self.n = Node(Mancala()) self.n.player = 1 self.m = Node.copy_node(self.n)
def setUp(self): self.n = Node(Mancala())
class TestMancala(TestCase): def setUp(self): self.m = Mancala() self.g = Mancala() def test_init(self): self.assertEqual(self.m.p1_store, 0) self.assertEqual(self.m.p2_store, 0) self.assertEqual(self.m.is_terminal, False) self.assertEqual([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], self.m.actions) self.assertEqual([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4], self.m.board) def invalid_play(self, board, move, player): """Simply used to verify that moves will fail when they're invalid choices""" # This can probably be replaced by self.assertRaises() but I can't be bothered try: board.play(move, player) except ValueError: pass else: self.fail() def test_play(self): # This does the major brunt of the testing, I can't really be bothered to write tests for # remove_slam and update_actions since they're all heavily tied to each other # Test 1 - Valid Move self.m.play(0, 1) self.assertEqual([0, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4], self.m.board, "Test 1 - Valid Move") self.m.play(7, 2) self.assertEqual([0, 5, 5, 5, 5, 4, 4, 0, 5, 5, 5, 5], self.m.board, "Test 1 - Valid Move") self.m = Mancala() self.m.play(0, 1) # Test 2 - Invalid move self.invalid_play(self.m, 0, 1) self.assertEqual([0, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4], self.m.board, "Test 2 - Invalid move") # Test 3 - Wrap around move self.m.play(11, 2) self.assertEqual([1, 6, 6, 6, 5, 4, 4, 4, 4, 4, 4, 0], self.m.board, "Test 3 - Wrap around move") # Test 4 - Player 1 must feed # -- new board -- self.m.board = [1, 6, 6, 6, 5, 4, 0, 0, 0, 0, 0, 0] self.m.update_actions(1) self.assertEqual([1, 2, 3, 4, 5], self.m.actions, "Test 4 - Player 1 must feed") # Test 5 - Invalid move self.invalid_play(self.m, 9, 1) self.assertEqual([1, 6, 6, 6, 5, 4, 0, 0, 0, 0, 0, 0], self.m.board, "Test 5 - Invalid move") # -- new board -- # Test 6 - Grand slam detection for p1 self.m.board = [1, 0, 2, 0, 0, 6, 2, 1, 2, 2, 1, 2] self.m.update_actions(1) self.assertEqual([0, 2], self.m.get_actions(1), "Test 6 - Grand slam detection for p1") self.invalid_play(self.m, 5, 1) self.assertEqual([1, 0, 2, 0, 0, 6, 2, 1, 2, 2, 1, 2], self.m.board) # -- new board -- # Test 7 - Grand slam detection for p2 self.m.board = [2, 1, 2, 2, 1, 2, 1, 0, 2, 0, 0, 6] self.m.update_actions(2) self.assertEqual(self.m.get_actions(2), [6, 8], "Test 7 - Grand slam detection for p2") self.invalid_play(self.m, 11, 2) self.assertEqual(self.m.board, [2, 1, 2, 2, 1, 2, 1, 0, 2, 0, 0, 6]) # Test 8 - Captures self.m.board = [4, 4, 4, 3, 2, 3, 2, 1, 2, 2, 1, 2] self.m.update_actions(1) self.m.play(5, 1) self.assertEqual(8, self.m.p1_store) # TEST self.m.board = [4, 5, 2, 0, 3, 3, 3, 3, 2, 2, 0, 4] self.m.update_actions(1) self.m.play(5, 1) self.assertEqual(False, self.m.is_terminal) print(self.m.board) def test_reward(self): # TODO self.assertEqual(1, 1) def test_get_actions(self): self.assertEqual([0, 1, 2, 3, 4, 5], self.g.get_actions(1)) self.assertEqual([6, 7, 8, 9, 10, 11], self.g.get_actions(2)) self.g.play(0, 1) self.assertEqual([1, 2, 3, 4, 5], self.g.get_actions(1)) self.g.play(8, 2) self.assertEqual([6, 7, 9, 10, 11], self.g.get_actions(2)) def test__remove_slam(self): self.assertEqual(1, 1) def test_update_actions(self): # valid move self.m.play(0, 1) self.assertEqual([1, 2, 3, 4, 5], self.m.get_actions(1)) self.assertEqual([6, 7, 8, 9, 10, 11], self.m.get_actions(2)) self.assertEqual([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], self.m.actions) self.m.board = [6, 1, 0, 0, 10, 3, 2, 2, 1, 0, 0, 0] self.m.update_actions(1) self.assertEqual([0, 1, 4, 6, 7, 8], self.m.actions) self.m.board = [0, 0, 0, 0, 4, 3, 2, 2, 1, 0, 0, 0] self.m.update_actions(1) for i in [4, 5, 6, 7, 8]: self.assertIn(i, self.m.actions) self.assertEqual(5, len(self.m.actions))
if byte_stream == None: break else: server_text = byte_stream.decode('utf-8').rstrip() msg_parser = Parser(server_text) if msg_parser.is_start(): side = msg_parser.get_side() first_move = True if msg_parser.is_end(): break if has_win and msg_parser.is_our_turn(): _, game_board = msg_parser.get_board() mancala = Mancala(7, 7, game_board) valid_moves = mancala.get_valid_moves(side) move = random.choice(valid_moves) #print("has win") message = "MOVE;" + str(move) + "\n" server.sendall(message.encode('utf-8')) continue if first_move: if side == 'north' and msg_parser.is_our_turn(): #message = "SWAP\n" #message = "MOVE;1\n" _, game_board = msg_parser.get_board() mancala = Mancala(7, 7, game_board) move = AlphaPruningAgent(max_depth=7, process_depth=0,
def minimax(self, m: mc.Mancala, depth, turns_list=None): maximizing_player = (m.current_player() == self.p) if depth == 0 or m.is_game_over(): # print("%s end node: %i" % ((SEP*self._depth),m.get_score(self.p) - m.get_score(1-self.p))) delta = m.get_score(self.p) - m.get_score(1 - self.p) return delta init_state = m.get_board_status() current_player = m.current_player() best_score = -100 if maximizing_player else 100 for i in m.get_valid_moves(): # print("%s P%i move: %i" % (SEP * (self._depth - depth), m.current_player(), i)) m.play_turn(i) # if maximizing_player and m.current_player() == self.p: # print("%s additional turn" % (SEP*(self._depth-depth))) node_weight = self.minimax(m, depth - 1) best_score = max(best_score, node_weight) if maximizing_player else min( best_score, node_weight) m.set_board_status(current_player, init_state) return best_score
def setUp(self) -> None: self.state = Node(Mancala())