def terminator_min_max(self, hex_board, depth, max_or_min):
        """Returns the best position according to the min-max algorithm.
        Args:
            hex_board (HexBoard): The current hex board.
            depth (int): Search depth of the min-max algorithm.
            max_or_min (str): maximise or minimise the current color to play (hex_board.blue_to_move). Is either 'min' or 'max'.
        Returns:
            (int, int): AI player move.
        """
        alpha = float('-inf')  # initial alpha beta bounds
        beta = float('inf')
        TT_offset = 0
        if self.do_transposition:
            TT_offset = self.max_depth - depth # level offset in the transpostion table. This is necessary when using iterative deepening
            moves = order_moves_TT(hex_board, max_or_min, self.transposition_table[depth + TT_offset])
        else:
            moves = hex_board.get_free_positions()

        is_game_over = False
        if hex_board.check_win(hex_board.BLUE) or hex_board.check_win(hex_board.RED):
            is_game_over = True

        if depth == 0 or is_game_over or len(moves) == 0:  # leaf node or game over
            print("@TerminatorHex.terminator_min_max: no valid moves left, or out of depth")
            return None  # shouldn't happen
        elif max_or_min == 'max':  # maximise
            value = float('-inf')
            best_move = moves[0]  # prevent None return
            for move in moves:
                deepened_board = HexBoard(hex_board.board_size, n_players=2, enable_gui=False, interactive_text=False, ai_move=None, blue_ai_move=None, red_ai_move=None, move_list=hex_board.move_list)
                deepened_board.set_position_auto(move)
                #print(deepened_board.move_list)
                #print(hex_board.move_list)
                # new_value = minimax(deepened_board, depth - 1, 'min', self.heuristic_evaluator) # use for minimax
                new_value, alpha, beta = alpha_beta(deepened_board, depth - 1, 'min', alpha, beta,
                                                    self.heuristic_evaluator, depth_weighting=self.depth_weighting,
                                                    transposition_table=self.transposition_table, TT_offset=TT_offset)
                if (new_value > value):
                    value = new_value
                    best_move = move
                    if self.do_transposition:
                        self.transposition_table[depth + TT_offset][board_as_hash_key(hex_board)] = value
            return best_move
        elif max_or_min == 'min':  # minimise
            value = float('inf')
            best_move = moves[0]
            for move in moves:
                deepened_board = HexBoard(hex_board.board_size, n_players=2, enable_gui=False, interactive_text=False, ai_move=None, blue_ai_move=None, red_ai_move=None, move_list=hex_board.move_list)
                deepened_board.set_position_auto(move)
                # new_value = minimax(deepened_board, depth - 1, 'max', self.heuristic_evaluator)
                new_value, alpha, beta = alpha_beta(deepened_board, depth - 1, 'max', alpha, beta,
                                                    self.heuristic_evaluator, depth_weighting=self.depth_weighting,
                                                    transposition_table=self.transposition_table, TT_offset=TT_offset)
                if (new_value < value):
                    value = new_value
                    best_move = move
                    if self.do_transposition:
                        self.transposition_table[depth + TT_offset][board_as_hash_key(hex_board)] = value
            return best_move
Exemplo n.º 2
0
    def __init__(self, m, n, mode):

        # init all events
        EventManager.initEvents()

        EventManager.subscribe("GameFinished", self.onGameFinished)
        EventManager.subscribe("GameStarted", self.onGameStarted)

        # save size and mode
        self.size = [m, n]
        self.mode = mode

        # just to init the value
        self._currentPlayer = 1

        # instantiate model and view
        self.HexBoard = HexBoard(self.size[0], self.size[1])
        self.HexBoard.setReferenceToGame(self)

        if self.UIRequired():
            self.HexGUI = HexGUI(self.size[0], self.size[1], self)
        else:
            self.MachineGUI = MachineGUI(self.size[0], self.size[1], self)

        # set the game to entry point
        self.start(self._currentPlayer)

        if self.UIRequired():
            # main loop starts for event receiving
            self.HexGUI.mainloop()

        if self.UIRequired() == False:
            self.MachineGUI.gameLoop()
def order_moves_TT(hex_board, max_or_min, transposition_table, return_key_values=False):
    """Used with the transposition table algorithm. If we have a transposition table bound to self,
            return a move list for the current board sorted by heuristic in the TT.
            Relies on hex_board.blue_to_move for color determination.
        Args:
            hex_board (HexBoard): Hex board to evaluate
            max_or_min ('min' or 'max'): Sort by minimum eval values or maximum respectively.
            transposition_table: a dict-form transposition table
            return_key_values (bool): return (move, value) pairs instead of list of moves.
        Returns:
            list: Sorted list of moves
    """
    moves = [[move, 0] for move in hex_board.get_free_positions()]  # [move, score]
    for m in range(len(moves)):
        move = moves[m][0]
        deepened_board = HexBoard(hex_board.board_size, n_players=2, enable_gui=False, interactive_text=False, ai_move=None, blue_ai_move=None, red_ai_move=None, move_list=hex_board.move_list)
        deepened_board.set_position_auto(move)
        try:
            moves[m][1] = transposition_table[board_as_hash_key(deepened_board)]
        except KeyError:
            # print("@order_moves_TT: could not find deepened board in transposition table. Move order may now be incorrect") # actually, this is expected behaviour
            moves[m][1] = [float('inf'), float('-inf')][
                max_or_min == 'max']  # instead, append it to the end of the moves list, do the known moves first
    sort_order = [False, True][max_or_min == 'max']  # so default = min
    moves.sort(key=lambda val: val[1], reverse=sort_order)  # sort ascending or descending
    if return_key_values:
        return moves
    else:
        return [val[0] for val in moves]  # handle from 0 to len as proper ordering
Exemplo n.º 4
0
    def start(self, firstPlayer):

        self.GameState = 0

        EventManager.notify("GameStarting")

        # move counter init
        self.moveCounter = 0

        # generate fresh state
        self.HexBoard = HexBoard(self.size[0], self.size[1])
        self.HexBoard.setReferenceToGame(self)

        # current player depending on decision
        self._currentPlayer = firstPlayer

        if self.mode == "ki":
            self.KI = MonteCarloTreeSearch(self, 2)  # TODO: ki_player_id == 2?
            # self.KI = HexKI(self.size[0], self.size[1])

        if self.mode == "inter" or self.mode == "machine":

            self.KI = []
            self.KI.append(HexKI(self.size[0], self.size[1]))
            self.KI.append(HexKI(self.size[0], self.size[1]))

            self._currentPlayerType = "ki"

        # if random number wanted, generate one
        if firstPlayer == 0:
            self.chooseFirst()

        EventManager.notify("GameStarted")
Exemplo n.º 5
0
 def start(self):
     
     self.KI = []
     self.KI.append(HexKI(self.size[0], self.size[1]))
     self.KI.append(HexKI(self.size[0], self.size[1]))
     
     self.Game.HexBoard = HexBoard(self.size[0], self.size[1])
     self.Game.HexBoard.setReferenceToGame(self.Game)
Exemplo n.º 6
0
    def gameLoop(self):
        
        print("Entering Game Loop")
        player = 1
        Q = []
        while self.IterationCounter < self.targetIterations:
            
            q= 0
            while not self._finished:

                q += 1
                if player == 0:
                    player = 1
                else:
                    player = 0
                
                move = self.KI[player].nextMove()
                
                self.KI[0].receiveMove(move)
                self.KI[1].receiveMove(move)
                self.Game.makeMove(move)
                
                #print(self.KI[player].PatternMatcher.mapGameState())
            
            if q < 50:
                Q.append(q)
            self.IterationCounter = self.IterationCounter + 1
            
            
            
            for key, value in self.Game.HexBoard.Vertices.items():
                if value.player == self.Game.HexBoard.winner():
                    self.WonVertices.append(str(value.i) + ";" + str(value.j))
            
            if self.IterationCounter // (self.targetIterations / 20) != self.q:
                self.q = self.IterationCounter // (self.targetIterations / 20)
                print(round(self.IterationCounter/self.targetIterations * 100,1), "%", self.IterationCounter)
            
            self.Game.HexBoard = HexBoard(self.size[0], self.size[1])
            self.Game.HexBoard.setReferenceToGame(self.Game)
            
            self._finished = False
            self.start()
            
        print("FINISHED")
        
        f = open('output.txt', 'r+')
        f.write("Move Count: Average:" + str(round(sum(Q) / len(Q))) + ", Min:", str(min(Q)) + ", Max:" + str(max(Q)) + str(Q))
        
        
        print(collections.Counter(self.WonVertices))
def minimax(hex_board, depth, max_or_min, evaluator):
    """The minimax algorithm on the HexBoard
        Args:
            hex_board (HexBoard): The current hex board.
            depth (int): maximum depth to search
            max_or_min (str): maximise or minimise the current color to play (hex_board.blue_to_move). Is either 'min' or 'max'.
            evaluator (function): evaluator function. Called with args hex_board, maximiser_color
        Returns:
            int: maximised/minimised value according to the evaluator
        """
    moves = hex_board.get_free_positions()
    maximiser_color = [hex_board.BLUE, hex_board.RED][(max_or_min == 'max') ^ (hex_board.blue_to_move)]
    is_game_over = False
    if hex_board.check_win(hex_board.BLUE) or hex_board.check_win(hex_board.RED):
        is_game_over = True

    # minimax:
    if depth <= 0 or is_game_over or len(moves) == 0:  # end state
        return (evaluator(hex_board, maximiser_color))
    elif max_or_min == 'max':  # maximise
        value = float('-inf')
        for move in moves:
            deepened_board = HexBoard(board.board_size, n_players=2, enable_gui=False, interactive_text=False, ai_move=None, blue_ai_move=None, red_ai_move=None, move_list=board.move_list)
            deepened_board.set_position_auto(move)
            new_value = minimax(deepened_board, depth - 1, 'min', evaluator)
            value = [value, new_value][new_value > value]
        return value
    elif (max_or_min == 'min'):  # minimise
        value = float('inf')
        for move in moves:
            deepened_board = HexBoard(board.board_size, n_players=2, enable_gui=False, interactive_text=False, ai_move=None, blue_ai_move=None, red_ai_move=None, move_list=board.move_list)
            deepened_board.set_position_auto(move)
            new_value = minimax(deepened_board, depth - 1, 'max', evaluator)
            value = [value, new_value][new_value < value]
        return value

    print("@minimax: unknown max_or_min objective", max_or_min)
    return None
def play_1v1(player1_move, player1_rating, player2_move, player2_rating,
             cur_round):
    """Plays two AI algorithms against each other and updates their ratings.
    Args:
        player1_move (function): Move generator for player 1
        player1_rating (ts.Rating): Current rating for player 1
        player2_move (function): Move generator for player 2
        player2_rating (ts.Rating): Current rating for player 2
        cur_round (int): Current iteration number. Used to determine player colors.
    """
    board_size = 5

    # Select color
    if cur_round % 2 == 0:
        player1_color = HexBoard.BLUE
        player2_color = HexBoard.RED
        blue_ai_move = player1_move
        red_ai_move = player2_move
    else:
        player1_color = HexBoard.RED
        player2_color = HexBoard.BLUE
        blue_ai_move = player2_move
        red_ai_move = player1_move

    board = HexBoard(board_size,
                     n_players=0,
                     enable_gui=False,
                     interactive_text=False,
                     ai_color=None,
                     ai_move=None,
                     blue_ai_move=blue_ai_move,
                     red_ai_move=red_ai_move,
                     move_list=[])
    winning_color = board.get_winning_color()

    # Update ratings
    if winning_color == player1_color:
        new_player1_rating, new_player2_rating = ts.rate_1vs1(
            player1_rating, player2_rating)
    elif winning_color == player2_color:
        new_player2_rating, new_player1_rating = ts.rate_1vs1(
            player2_rating, player1_rating)
    else:
        new_player1_rating = None
        new_player2_rating = None
        print("Rating error")

    return new_player1_rating, new_player2_rating
Exemplo n.º 9
0
    def start(self, firstPlayer):

        EventManager.notify("GameStarting")

        # move counter init
        self.moveCounter = 0

        # generate fresh state
        self.HexBoard = HexBoard(self.size[0], self.size[1])
        self.HexBoard.setReferenceToGame(self)

        # current player depending on decision
        self._currentPlayer = firstPlayer

        # if random number wanted, generate one
        if firstPlayer == 0:
            self.chooseFirst()

        EventManager.notify("GameStarted")
Exemplo n.º 10
0
import math

import MCTSHex
from HexBoard import HexBoard


if __name__ == '__main__':
    enable_GUI = True
    enable_interactive_text = True
    board_size = 6
    n_players = 1
    ai_color = HexBoard.RED
    MCTS_AI_1 = MCTSHex.MCTSHex(500, 10, expansion_function=('constant', 1), enh_WinScan=True, enh_EnsureTopLevelExplr=True)
    MCTS_AI_2 = MCTSHex.MCTSHex(500, 10, expansion_function=('constant', 0.5), enh_WinScan=True, enh_FreqVisitor=True)
    board = HexBoard(board_size, n_players=n_players, enable_gui=enable_GUI, interactive_text=enable_interactive_text,
                     ai_move=MCTS_AI_1.MCTS_move, ai_color=ai_color,
                     blue_ai_move=MCTS_AI_1.MCTS_move, red_ai_move=MCTS_AI_2.MCTS_move,
                     move_list=[])

    if not enable_GUI and not enable_interactive_text:

        # sanity check that wins are detected
        for i in range(0, 2):
            winner = HexBoard.RED if i == 0 else HexBoard.BLUE
            loser = HexBoard.BLUE if i == 0 else HexBoard.RED
            board = HexBoard(3)
            board.place_with_color((1, 1), loser)
            board.place_with_color((2, 1), loser)
            board.place_with_color((1, 2), loser)
            board.place_with_color((2, 2), loser)
            board.place_with_color((0, 0), winner)
            board.place_with_color((1, 0), winner)
Exemplo n.º 11
0
    def __init__(self, m, n, mode):

        self._pause = False

        # save size and mode
        self.size = [m, n]
        self.mode = mode

        # just to init the value
        self._currentPlayer = 1
        self._currentPlayerType = "human"

        if mode == "copy":
            return

        # init all events
        EventManager.initEvents()

        EventManager.subscribe("GameFinished", self.onGameFinished)
        EventManager.subscribe("GameStarted", self.onGameStarted)
        EventManager.subscribe("MoveFinished", self.onMoveFinished)
        EventManager.subscribe("GameUILoaded", self.onGameUILoaded)
        EventManager.subscribe("ToggleVictoryPath", self.onToggleVictoryPath)

        # instantiate model and view
        self.HexBoard = HexBoard(self.size[0], self.size[1])
        self.HexBoard.setReferenceToGame(self)

        self.GameState = 0

        self.moveCounter = 0

        if self.UIRequired():
            self.HexGUI = HexGUI(self.size[0], self.size[1], self)
        else:
            self.MachineGUI = MachineGUI(self.size[0], self.size[1], self)

        if self.mode == "ki":
            self.KI = HexKI(self.size[0], self.size[1])

        if self.mode == "inter" or self.mode == "machine":

            self.KI = []
            self.KI.append(HexKI(self.size[0], self.size[1]))
            self.KI.append(HexKI(self.size[0], self.size[1]))

            self._currentPlayerType = "ki"

        # set the game to entry point
        #self.start(self._currentPlayer)

        if self.UIRequired():

            EventManager.subscribe("UITick", self.onUITick)

            # main loop starts for event receiving
            self.HexGUI.mainloop()

        if self.UIRequired() == False:
            self.GameState = 1
            self.MachineGUI.gameLoop()
def get_elo_and_time(N, C_p, max_time=0, debug=False):
    if debug:
        print("Evaluting N=%d and C_p=%.3f" % (N, C_p))
    BOARD_SIZE = 6
    MAX_TURNS = BOARD_SIZE**2
    N_ROUNDS = 12

    terminator = TerminatorHex.TerminatorHex(2,
                                             do_transposition=False,
                                             max_time=max_time)
    terminator_player_move = terminator.terminator_move

    MCTS_AI = MCTSHex.MCTSHex(N,
                              C_p,
                              expansion_function=('constant', 1),
                              random_seed="random",
                              enh_WinScan=False,
                              enh_FreqVisitor=False,
                              enh_EnsureTopLevelExplr=False)

    terminator_rating = ts.Rating()
    mcts_rating = ts.Rating()

    average_time = []
    for game in range(N_ROUNDS):
        if debug:
            print("Currently playing game number %d of %d" %
                  (game + 1, N_ROUNDS))

        time_array = np.zeros((MAX_TURNS, 1))
        partial_move = partial(timeEvalMoveHook,
                               AI_move_func=MCTS_AI.MCTS_move,
                               timing_vector=time_array)

        if game % 2 == 0:
            mcts_color = HexBoard.BLUE
            terminator_color = HexBoard.RED
            blue_ai_move = partial_move
            red_ai_move = terminator_player_move
        else:
            mcts_color = HexBoard.RED
            terminator_color = HexBoard.BLUE
            blue_ai_move = terminator_player_move
            red_ai_move = partial_move

        board = HexBoard(BOARD_SIZE,
                         n_players=0,
                         enable_gui=False,
                         interactive_text=False,
                         ai_color=None,
                         ai_move=None,
                         blue_ai_move=blue_ai_move,
                         red_ai_move=red_ai_move,
                         move_list=[])
        winning_color = board.get_winning_color()

        if winning_color == mcts_color:
            mcts_rating, terminator_rating = ts.rate_1vs1(
                mcts_rating, terminator_rating)
        elif winning_color == terminator_color:
            terminator_rating, mcts_rating = ts.rate_1vs1(
                terminator_rating, mcts_rating)

        if mcts_color == HexBoard.BLUE:
            time_array = time_array[0::2]
        else:
            time_array = time_array[1::2]

        if debug:
            print("Average time was %.3f seconds" % np.mean(time_array))
        average_time.append(np.mean(time_array))
        mcts_trueskill = mcts_rating.mu - 3 * mcts_rating.sigma
        terminator_trueskill = terminator_rating.mu - 3 * terminator_rating.sigma
    return mcts_trueskill - terminator_trueskill, np.mean(average_time)
                if game % 2 == 0:
                    mcts_color = HexBoard.BLUE
                    terminator_color = HexBoard.RED
                    blue_ai_move = MCTS_AI.MCTS_move
                    red_ai_move = terminator_player_move
                else:
                    mcts_color = HexBoard.RED
                    terminator_color = HexBoard.BLUE
                    blue_ai_move = terminator_player_move
                    red_ai_move = MCTS_AI.MCTS_move

                board = HexBoard(BOARD_SIZE,
                                 n_players=0,
                                 enable_gui=False,
                                 interactive_text=False,
                                 ai_color=None,
                                 ai_move=None,
                                 blue_ai_move=blue_ai_move,
                                 red_ai_move=red_ai_move,
                                 move_list=[])
                winning_color = board.get_winning_color()

                if winning_color == mcts_color:
                    mcts_rating, terminator_rating = ts.rate_1vs1(
                        mcts_rating, terminator_rating)
                elif winning_color == terminator_color:
                    terminator_rating, mcts_rating = ts.rate_1vs1(
                        terminator_rating, mcts_rating)

                rating_list.append(terminator_rating.mu)
Exemplo n.º 14
0
interface by setting enable_gui to True."""

if __name__ == '__main__':
    board_size = 4
    enable_gui = False

    if enable_gui:
        enable_text_interface = False
    else:
        enable_text_interface = True

    evaluator_function = TerminatorHex.random_score_heuristic  # Random evaluator function
    ai = TerminatorHex.TerminatorHex(3,
                                     use_suggested_heuristic=False,
                                     heuristic_evaluator=evaluator_function,
                                     depth_weighting=0,
                                     random_seed=10,
                                     do_iterative_deepening=False,
                                     max_time=None,
                                     do_transposition=False)
    move_generator = ai.terminator_move  # Move generator that uses alpha_beta search
    board = HexBoard(board_size,
                     n_players=1,
                     enable_gui=enable_gui,
                     interactive_text=enable_text_interface,
                     ai_move=move_generator,
                     ai_color=HexBoard.RED,
                     blue_ai_move=None,
                     red_ai_move=None,
                     move_list=[])
def alpha_beta(hex_board, depth, max_or_min, alpha, beta, evaluator, depth_weighting=0, transposition_table=None, TT_offset=0):
    """The minimax algorithm on the HexBoard with alpha-beta-pruning
        Args:
            hex_board (HexBoard): The current hex board.
            depth (int): maximum depth to search
            max_or_min (str): maximise or minimise the current color to play (hex_board.blue_to_move). Is either 'min' or 'max'.
            alpha, beta (int): initial alpha and beta bounds. Can be float('-inf') and float('inf')
            evaluator (function): evaluator function. Called with args hex_board, maximiser_color
            depth-weighting (float): add heuristic score weight to the current evaluation depth. This can be used to
                force immediate capitalisation on good moves
            transposition_table: use transposition table. Enter None to disable transposition table usage.
            TT_offset (int): level index offset in the transpostion table. This is necessary when using iterative deepening,
                i.e. when a shallower than TerminatorHex.max_depth search is performed
        Returns:
            int: maximised/minimised value according to the evaluator
    """
    maximiser_color = [hex_board.BLUE, hex_board.RED][(max_or_min == 'max') ^ (hex_board.blue_to_move)]
    is_game_over = False
    if (hex_board.check_win(hex_board.BLUE) or hex_board.check_win(hex_board.RED)):
        is_game_over = True

    use_transposition = [False, True][transposition_table != None]
    if depth > 0:
        if use_transposition:
            moves = order_moves_TT(hex_board, max_or_min, transposition_table[depth + TT_offset])
        else:
            moves = hex_board.get_free_positions()
    else:
        moves = None  # don't need to compute this, save time

    # minimax with alpha-beta pruning:
    if (depth <= 0 or is_game_over or len(moves) == 0):  # end state
        value = evaluator(hex_board, maximiser_color) + (depth_weighting * depth)
        if use_transposition:
            transposition_table[depth][board_as_hash_key(hex_board)] = value
        return (value, alpha, beta)
    elif (max_or_min == 'max'):  # maximise
        value = float('-inf')
        for move in moves:
            deepened_board = HexBoard(hex_board.board_size, n_players=2, enable_gui=False, interactive_text=False, ai_move=None, blue_ai_move=None, red_ai_move=None, move_list=hex_board.move_list)
            deepened_board.set_position_auto(move)
            new_value, _, _ = alpha_beta(deepened_board, depth - 1, 'min', alpha, beta, evaluator,
                                         depth_weighting=depth_weighting, transposition_table=transposition_table,
                                         TT_offset=TT_offset)
            if new_value > value:
                value = new_value
                if use_transposition:
                    transposition_table[depth + TT_offset][board_as_hash_key(hex_board)] = value
            alpha = [alpha, new_value][new_value > alpha]
            if alpha >= beta:  # beta cutoff
                # print("beta", alpha, beta)
                break
        return (value, alpha, beta)
    elif (max_or_min == 'min'):  # minimise
        value = float('inf')
        for move in moves:
            deepened_board = HexBoard(hex_board.board_size, n_players=2, enable_gui=False, interactive_text=False, ai_move=None, blue_ai_move=None, red_ai_move=None, move_list=hex_board.move_list)
            deepened_board.set_position_auto(move)
            new_value, _, _ = alpha_beta(deepened_board, depth - 1, 'max', alpha, beta, evaluator,
                                         depth_weighting=depth_weighting, transposition_table=transposition_table,
                                         TT_offset=TT_offset)
            if new_value < value:
                value = new_value
                if use_transposition:
                    transposition_table[depth + TT_offset][board_as_hash_key(hex_board)] = value
            beta = [beta, new_value][new_value < beta]
            if alpha >= beta:  # alpha cutoff
                # print("alpha", alpha, beta)
                break
        return (value, alpha, beta)

    print("@alpha_beta: unknown max_or_min objective", max_or_min)
    return None
Exemplo n.º 16
0
import numpy as np

import TerminatorHex
from HexBoard import HexBoard
""" Simple class to play a single player game against an AI using the GUI.
"""

if __name__ == '__main__':
    enable_GUI = True
    enable_interactive_text = True
    board_size = 5
    n_players = 1
    ai_color = HexBoard.BLUE

    terminator_AI = TerminatorHex.TerminatorHex(3, True)
    board = HexBoard(board_size,
                     n_players=n_players,
                     enable_gui=enable_GUI,
                     interactive_text=enable_interactive_text,
                     ai_move=terminator_AI.terminator_move,
                     ai_color=ai_color,
                     blue_ai_move=None,
                     red_ai_move=None,
                     move_list=[])
Exemplo n.º 17
0
import random
import torch

if __name__ == "__main__":
    board_size = config.BOARD_SIZE
    exploration_bonus = config.EXPLORATION_BONUS
    num_simulations = config.NUM_SIMULATIONS
    num_episodes = config.NUM_EPISODES
    save_every = config.SAVE_EVERY
    folder_results = config.FOLDER_RESULTS
    lr = config.LEARNING_RATE

    utils.create_directory(folder_results)

    time_start = time.time()
    board = HexBoard(board_size)
    mapper = Mapper(board_size=board_size)
    state_manager = StateManager(board, mapper, verbose=config.VISUALIZE_MOVES)

    #anet_nn = ANET_NN_COMPLEX(input_size=board_size*board_size, output_size = board_size*board_size).float()
    anet_nn = ANET_NN_GENERAL(input_size=board_size * board_size,
                              output_size=board_size * board_size,
                              hidden_layers=config.HIDDEN_LAYERS,
                              activation=config.ACTIVATION).float()

    anet = ANET_MODEL(anet_nn,
                      max_cases_in_buffer=config.REPLAY_BUFFER_MAX_SIZE)
    optimizer = config.OPTIMIZERS[config.OPTIMIZER](anet_nn.parameters(),
                                                    lr=lr)

    for episode in range(num_episodes):