def main(): board_size = 4 max_num_moves = int(board_size ** 2) state_space_size = int(board_size ** 2 + 1) conv_layers = [] state_space_size = 128 hidden_layers = [state_space_size, max_num_moves] la = 0.01 state_manager = StateManager(board_size) num_simulations = 200 player1 = NeuralActor(conv_layers, hidden_layers, max_num_moves, la, 'sgd') player2 = NeuralActor(conv_layers, hidden_layers, max_num_moves, la, 'sgd') mct1 = MCT(player1, num_simulations) mct2 = MCT(player2, num_simulations) train = False if train == True: start_time = time.time() for i in range(0, 100): mct1.play_game(copy.deepcopy(state_manager)) training_data = mct1.get_training_data() loss = player1.update_Q(training_data) print(str(i) + " " + str(loss)) player1.store_model('data/16.3') else: player1.load_model('data/16.3') player2.load_model('data/16.3') win1 = 0 win2 = 0 for i in range(0, 1000): state_manager = StateManager(board_size) while not state_manager.player1_won() and not state_manager.player2_won(): if not state_manager.player1_to_move: move_index = random.randrange(0, board_size ** 2) while not StateManager.is_legal(move_index, state_manager.string_representation()): move_index = random.randrange(0, board_size ** 2) move = state_manager.convert_to_move(move_index) move = state_manager.convert_to_move(player2.get_action(state_manager.string_representation())) else: move_index = random.randrange(0, board_size ** 2) while not StateManager.is_legal(move_index, state_manager.string_representation()): move_index = random.randrange(0, board_size ** 2) move = state_manager.convert_to_move(move_index) #move = state_manager.convert_to_move(player2.get_action(state_manager.string_representation())) state_manager.make_move(move) #state_manager.show() if state_manager.player1_won(): win1 += 1 elif state_manager.player2_won(): win2 += 1 else: print("No winner") print("Times player 1 won: " + str(win1) + ". " + "Times player2 won: " + str(win2))
def expand(self): size = int((len(self.state) - 1)**0.5) state_manager = StateManager(size, self.state) possible_moves = state_manager.get_moves() self.children = [] for move in possible_moves: state_manager.make_move(move) self.children.append((Node(self, state_manager.string_representation()), 0)) # Visited 0 times state_manager.undo_move(move)
def rollout(self, leaf): size = int((len(leaf.state) - 1) ** 0.5) leaf_state, first_iteration = StateManager(size, leaf.state), True while True: if leaf_state.player1_won(): score = 1.0 return score elif leaf_state.player2_won(): score = -1.0 return score if leaf_state.is_finished(): print("No winner error") quit() if first_iteration: possible_moves = leaf_state.get_moves() move = possible_moves[random.randint(0, len(possible_moves) - 1)] first_iteration = False else: move = leaf_state.convert_to_move(self.nn.get_action(leaf_state.string_representation())) leaf_state.make_move(move)