Exemplo n.º 1
0
def as_board(moves):
    states = np.empty((len(moves), 7*6))
    labels = np.empty((len(moves), 2))

    game = Game()
    for _,i in moves.iterrows():
        idx, player, move, winner = i['idx'], i['player'], i['move'], i['winner']
        game.play_move(player, move)
        states[idx,:] = game.board.reshape((-1))
        labels[idx,0] = winner
        labels[idx,1] = player

    return (states, labels)
Exemplo n.º 2
0
    def __init__(self, board=None, print_friendly=False):
        super().__init__()
        self.game = Game(
            board=board)  # Cannot do inheritance due to deepcopy in Game...
        self.states = []

        self.model = None
        self.board_nodes = self.game.width * self.game.height
        self.input_shape = (self.board_nodes + 1, )

        # If True, use another character for the other player so they are distinct
        # when printing without colours, e.g. ■ ▀ • ⦿
        self.print_friendly = print_friendly
        self.__players = {
            -1: style("■", Colours.FG.YELLOW),
            0: " ",
            1: style("•" if self.print_friendly else "■", Colours.FG.RED)
        }
Exemplo n.º 3
0
    def _predict_move_probability(self, player, check_early_win=True):
        # Predict chance of winning for each move
        # and return column with highest chance.
        max_probability = (0, 0.0)

        print(self.print_player_string(player) + ": Testing cols: |", end="")

        for move in range(self.game.width):
            if not self.game.is_legal_move(move):
                print(style(f" {0.0:.3f} |", Colours.FG.BRIGHT_RED), end="")
                continue

            test_game = Game(board=self.game.board.copy())
            test_game.play_move(player, move)

            if check_early_win and test_game.status == player:
                # Win reached
                print(" win", end="")
                max_probability = (move, 1.0)
                break

            # Get prediction for move (make board positive by adding 1)
            test_input = np.concatenate(
                (test_game.board.flatten(), [player])).reshape(
                    (1, *self.input_shape)) + 1
            prediction = self.model.predict(test_input)[0][
                player + 1]  # [[player_0_prob, draw (?), player_1_prob]]

            if np.isnan(prediction):
                raise Exception("Error: prediction is NaN?")

            print(f" {prediction:.3f} |", end="")

            if prediction > max_probability[1]:
                max_probability = (move, prediction)

        print(
            f"  => Predicted move at col {max_probability[0]} with {max_probability[1] * 100:.2f}%"
        )
        return max_probability
Exemplo n.º 4
0
def humanvmodel(sess, X, Y, humanfirst=False):

    game = Game()

    if humanfirst:
        game.printState()
        check = game.move(int(input("You go first...")))

    while True:
        if humanfirst:
            pos = np.concatenate((game.noughts, game.crosses))
        else:
            pos = np.concatenate((game.crosses, game.noughts))
        out = sess.run(Y, feed_dict={X: pos})
        bestprob, bestmove = -10000.0, 0
        for i in range(0, len(out)):
            if out[i] > bestprob and game.spots[0, i] == 0.0:
                bestprob = out[i]
                bestmove = i
        print(bestprob)
        check = game.move(bestmove)
        game.printState()
        if check != 0.0:
            print("Game over!")
            break
        check = game.move(int(input("Your turn...")))
        if check != 0.0:
            game.printState()
            if check == 1000.0:
                print("It's a draw!")
            else:
                print("You win!")
            break
Exemplo n.º 5
0
def train(sess, X, Y, optimizer, cost, boards, outcomes, ends, wons, moves,
          explore_rate, memsize, batchsize, saver, directory, chckptrate,
          Ytest):

    boardm = np.zeros((memsize, 84))
    outcomem = np.zeros((memsize, 84))
    endm = np.zeros((memsize, 1))
    wonm = np.zeros((memsize, 1))
    movem = np.zeros((memsize, 1), dtype=int)
    movem = movem - 1
    test = np.zeros((memsize, 84))
    game = Game()
    firsttrain = True

    counter = 0
    noughtmoveip = False
    boardtemp = np.zeros((1, 84))
    movetemp = 0
    boardtemp2 = np.zeros((1, 84))
    movetemp2 = 0
    avcost = 0

    print("Beginning training...")

    while True:
        for i in range(0, chckptrate):

            boardtemp = np.concatenate((game.crosses, game.noughts)).T

            out = sess.run(Y, feed_dict={X: boardtemp.T})
            movetemp = findbestmove(out, game, explore_rate)
            check = game.move(movetemp)
            #game.printState()
            #input("Press any key")

            if check == 1.0:
                counter = addtomemory(boardm, outcomem,
                                      endm, wonm, movem, boardtemp,
                                      np.zeros((1, 84)), 1.0, 1.0, movetemp,
                                      counter)
                if noughtmoveip:
                    counter = addtomemory(boardm, outcomem, endm,
                                          wonm, movem, boardtemp2,
                                          np.zeros((1, 84)), 1.0, -1.0,
                                          movetemp2, counter)
                game.reset()
                noughtmoveip = False
                #print("Crosses won")
            if check == 1000.0:
                counter = addtomemory(boardm, outcomem,
                                      endm, wonm, movem, boardtemp,
                                      np.zeros((1, 84)), 1.0, 0.0, movetemp,
                                      counter)
                if noughtmoveip:
                    counter = addtomemory(boardm, outcomem, endm,
                                          wonm, movem, boardtemp2,
                                          np.zeros((1, 84)), 1.0, 0.0,
                                          movetemp2, counter)
                game.reset()
                noughtmoveip = False
                #print("Draw")

            if check == 0.0:

                if noughtmoveip:
                    counter = addtomemory(
                        boardm, outcomem, endm, wonm, movem, boardtemp2,
                        np.concatenate((game.noughts, game.crosses)).T, 0.0,
                        0.0, movetemp2, counter)

                noughtmoveip = True
                boardtemp2 = np.concatenate((game.noughts, game.crosses)).T

                out = sess.run(Y, feed_dict={X: boardtemp2.T})
                movetemp2 = findbestmove(out, game, explore_rate)
                check2 = game.move(movetemp2)
                #game.printState()
                #input("Press any key")

                if check2 == -1.0:
                    counter = addtomemory(boardm, outcomem, endm,
                                          wonm, movem, boardtemp,
                                          np.zeros((1, 84)), 1.0, -1.0,
                                          movetemp, counter)
                    counter = addtomemory(boardm, outcomem, endm,
                                          wonm, movem, boardtemp2,
                                          np.zeros((1, 84)), 1.0, 1.0,
                                          movetemp2, counter)
                    game.reset()
                    noughtmoveip = False
                    #print("Noughts won")
                if check2 == 1000.0:
                    counter = addtomemory(boardm, outcomem, endm,
                                          wonm, movem, boardtemp,
                                          np.zeros((1, 84)), 1.0, 0.0,
                                          movetemp, counter)
                    counter = addtomemory(boardm, outcomem, endm,
                                          wonm, movem, boardtemp2,
                                          np.zeros((1, 84)), 1.0, 0.0,
                                          movetemp2, counter)
                    game.reset()
                    noughtmoveip = False
                    #print("Draw")
                if check2 == 0.0:
                    counter = addtomemory(
                        boardm, outcomem, endm, wonm, movem, boardtemp,
                        np.concatenate((game.crosses, game.noughts)).T, 0.0,
                        0.0, movetemp, counter)

            if movem[memsize - 1] != -1:

                if firsttrain:
                    test = np.copy(boardm)
                    test = test[np.random.choice(np.arange(memsize),
                                                 size=1000,
                                                 replace=False)]
                    firsttrain = False

                sample = np.random.choice(np.arange(memsize),
                                          size=batchsize,
                                          replace=False)
                boardsample = boardm[sample]
                outcomesample = outcomem[sample]
                endsample = endm[sample]
                wonsample = wonm[sample]
                movesample = movem[sample]

                _, batchcost = sess.run(
                    [optimizer, cost],
                    feed_dict={
                        boards: boardsample.T,
                        outcomes: outcomesample.T,
                        ends: endsample.T,
                        wons: wonsample.T,
                        moves: movesample.T
                    })
                avcost = 0.9 * avcost + 0.1 * batchcost

        if not firsttrain:
            testvalue = np.average(
                sess.run(Ytest, feed_dict={outcomes: test.T}))
            print("Counter is " + str(counter) +
                  " and average Qmax on test set is " + str(testvalue))
            saver.save(sess, directory)
Exemplo n.º 6
0
# -*- coding: utf-8 -*-
"""
Created on Wed May 24 21:22:30 2017

@author: Toby
"""

from connectfour import Game

turncounter = 1

game = Game()

while True:
    game.printState()
    check = game.move(int(input("No winner yet...")))
    if check != 0.0:
        game.printState()
        print("End of Game! Player " + str(check) + " has won!")
        break
    turncounter = turncounter + 1
Exemplo n.º 7
0
 def reset(self):
     self.game = Game()
     self.states = []
Exemplo n.º 8
0
class MLC4:
    def __init__(self, board=None, print_friendly=False):
        super().__init__()
        self.game = Game(
            board=board)  # Cannot do inheritance due to deepcopy in Game...
        self.states = []

        self.model = None
        self.board_nodes = self.game.width * self.game.height
        self.input_shape = (self.board_nodes + 1, )

        # If True, use another character for the other player so they are distinct
        # when printing without colours, e.g. ■ ▀ • ⦿
        self.print_friendly = print_friendly
        self.__players = {
            -1: style("■", Colours.FG.YELLOW),
            0: " ",
            1: style("•" if self.print_friendly else "■", Colours.FG.RED)
        }

    def __str__(self):
        top_row = "┌" + ("─" * 3 + "┬") * (self.game.width - 1) + "─" * 3 + "┐"
        sep_row = "╞" + ("═" * 3 + "╪") * (self.game.width - 1) + "═" * 3 + "╡"
        bot_row = "└" + ("─" * 3 + "┴") * (self.game.width - 1) + "─" * 3 + "┘"

        return top_row \
             + "\n" + "\n".join(f"│ {f' │ '.join(map(self.print_player, li))} │" for li in reversed(self.game.board)) \
             + "\n" + sep_row \
             + "\n│ " + f' │ '.join(map(str, range(self.game.width))) + " │" \
             + "\n" + bot_row

    def _add_state(self, game, player, move):
        if player is not None and move is not None:
            self.states.append((player, move))
            print(self)

    def print_states(self, game_id=0, fp=None):
        for idx, move in enumerate(self.states):
            print(game_id,
                  idx,
                  move[0],
                  move[1],
                  self.game.status,
                  sep=",",
                  file=fp or sys.stdout)

    def print_player(self, p):
        return self.__players.get(p, self.__players[0])

    def print_player_string(self, p):
        if p == 0:
            return "Player DRAW"
        return f"Player {max(0, p) + 1} " + self.print_player(p)

    def reset(self):
        self.game = Game()
        self.states = []

    def play_original_vs_random(self, starting=None, legal_only=True, n=100):
        # Dummy call original play function
        return self.game.random_play(starting, legal_only, self._add_state)

    def play_original_vs_smart(self, starting=None, legal_only=True, n=100):
        # Dummy call original play function
        return self.game.smart_play(starting, legal_only, n, self._add_state)

    def _start_game(self, player, other_strat_descr="plays randomly"):
        print("-" * 80 + f"\n\nStarting game with {self.print_player_string(player)}. " \
            + f"AI player is {self.print_player_string(-1)}." \
            + f" {self.print_player(1)} {other_strat_descr}.")
        self._add_state(self.game, None, None)

    def play_vs_random(self,
                       starting=None,
                       legal_only=True,
                       check_early_win=True,
                       prevent_other_win=True):
        # Against random player
        player = starting if starting is not None else starting_player()
        self._start_game(player, other_strat_descr="plays randomly")

        while self.game.status is None:
            if player < 0 and self.model:
                move = self.predict(check_early_win=check_early_win,
                                    prevent_other_win=prevent_other_win)
            else:
                move = self.game.random_action(legal_only=legal_only)

            print(
                f"{self.print_player_string(player)} adds to column {move}...")
            self.game.play_move(player, move)
            self._add_state(self.game, player, move)
            player = player * -1

        print(f"{self.print_player_string(self.game.status)} wins!")
        return self.game.status

    def play_vs_smart(self,
                      starting=None,
                      legal_only=True,
                      n=100,
                      check_early_win=True,
                      prevent_other_win=True):
        # Against smart player
        player = starting if starting is not None else starting_player()
        self._start_game(player, other_strat_descr="plays smart")

        while self.game.status is None:
            if player < 0 and self.model:
                move = self.predict(check_early_win=check_early_win,
                                    prevent_other_win=prevent_other_win)
            else:
                move, p = self.game.smart_action(player,
                                                 legal_only=legal_only,
                                                 n=n)
                if not self.game.is_legal_move(move):
                    print(
                        style("Illegal move smart player! ",
                              Colours.FG.BRIGHT_RED), player, move)

            print(
                f"{self.print_player_string(player)} adds to column {move}...")
            self.game.play_move(player, move)
            self._add_state(self.game, player, move)
            player = player * -1

        print(f"{self.print_player_string(self.game.status)} wins!")
        return self.game.status

    def play_vs_ai(self,
                   starting=None,
                   legal_only=True,
                   check_early_win=True,
                   prevent_other_win=True,
                   random_move_chance=0.0):
        # Against own model
        player = starting if starting is not None else starting_player()
        self._start_game(player, other_strat_descr="plays also as AI")

        while self.game.status is None:
            if player > 0 and random_move_chance > 0.0 and random.random(
            ) <= random_move_chance:
                move = self.game.random_action(legal_only=legal_only)
            else:
                move = self.predict(ai_player=player,
                                    check_early_win=check_early_win,
                                    prevent_other_win=prevent_other_win)

            if not self.game.is_legal_move(move):
                print(
                    style("Illegal move from player! ", Colours.FG.BRIGHT_RED),
                    player, move)

            print(
                f"{self.print_player_string(player)} adds to column {move}...")
            self.game.play_move(player, move)
            self._add_state(self.game, player, move)
            player = player * -1

        print(f"{self.print_player_string(self.game.status)} wins!")
        return self.game.status

    ###########################################################################

    def has_model(self):
        return self.model is not None

    def load_existing_model(self, name, basepath="../data/models/"):
        try:
            self.model = load_model(f"{basepath}{name}", compile=True)
            self.model.predict(np.zeros(
                (1, *self.input_shape)))  # Init predictor
        except Exception as e:
            self.model = None
            print(style(f"Could not load model!\n{e}", Colours.FG.RED))
        else:
            self.model.summary()

    def build_network(self, name="", learning_rate=0.001):
        """
        Input : self.width * self.height board (42 squares) + player

            https://keras.io/api/layers/activations/#relu-function
            https://keras.io/api/layers/activations/#softmax-function
            https://keras.io/api/losses/probabilistic_losses/#sparsecategoricalcrossentropy-class
            https://keras.io/api/optimizers/Nadam/
            https://keras.io/api/metrics/accuracy_metrics/#accuracy-class

        Output: 2 => [[player_0_prob, player_1_prob]]
        """
        print(f"Building model{(' ' + name) if name else ''}...")

        self.model = keras.Sequential(name=name or None)

        # Input layer
        self.model.add(
            Dense(self.input_shape[0], input_dim=self.input_shape[0]))

        # One or more large layers
        self.model.add(Dense(64, activation='relu'))
        self.model.add(Dense(256, activation='relu'))
        self.model.add(Dense(256, activation='relu'))
        # self.model.add(Dense(64, activation='relu'))

        # Smaller ending layer
        self.model.add(Dense(self.board_nodes, activation='relu'))
        # self.model.add(Dense(self.game.width, activation='relu'))

        # Output end layer
        self.model.add(Dense(3, activation='softmax'))

        self.model.compile(loss=SparseCategoricalCrossentropy(),
                           optimizer=Nadam(learning_rate=learning_rate),
                           metrics=["accuracy"])

        self.model.summary()

    def prepare_data(self, input_file, train_ratio=0.8):
        print("Preparing data...")

        # Read data, should contain [ (board:42, winner:1, player:1), ... ]
        data = np.load(input_file)

        # Board and player values are made positive by adding one
        data += 1

        Y = data[:, -2:-1]  # (winner), result is 0 or 2
        X = np.delete(
            data, -2,
            1)  # Drop second to last column, result = (board state, player)

        size = int(train_ratio * X.shape[0])

        X_train, X_test, Y_train, Y_test = X[:size], X[size:], Y[:size], Y[
            size:]

        print("Data loaded.")
        return (X_train, Y_train), (X_test, Y_test)

    def train(self,
              train_data,
              test_data,
              epochs=10,
              batch_size=200,
              show_plot=False,
              save_plot_path=""):
        print("Training model...")

        train_x, train_y = train_data

        hist = self.model.fit(train_x,
                              train_y,
                              validation_data=test_data,
                              shuffle=True,
                              epochs=epochs,
                              batch_size=batch_size)

        test_score, test_acc = self.model.evaluate(test_data[0],
                                                   test_data[1],
                                                   verbose=0)

        print(style("Final accuracy on training set : ", Colours.FG.MAGENTA) \
            + style(f"{hist.history['accuracy'][-1] * 100:.2f}%", Colours.FG.BRIGHT_MAGENTA))
        print(style("Average accuracy while training: ", Colours.FG.MAGENTA) \
            + style(f"{np.average(np.array(hist.history['val_accuracy'])) * 100:.2f}%", Colours.FG.BRIGHT_MAGENTA))
        print(style("Average accuracy on test set   : ", Colours.FG.MAGENTA) \
            + style(f"{test_acc * 100:.2f}% (score={test_score:.4f})", Colours.FG.BRIGHT_MAGENTA))

        if show_plot or save_plot_path:
            plt.style.use("ggplot")
            plt.figure()
            plt.plot(
                np.arange(0, epochs),
                [1.0] * epochs,
                "r--",
                label="Accuracy target",
            )
            plt.plot(np.arange(0, epochs),
                     hist.history["loss"],
                     "cyan",
                     label="train_loss")
            plt.plot(np.arange(0, epochs),
                     hist.history["val_loss"],
                     "blue",
                     label="val_loss")
            plt.plot(np.arange(0, epochs),
                     hist.history["accuracy"],
                     "yellow",
                     label="train_acc")
            plt.plot(np.arange(0, epochs),
                     hist.history["val_accuracy"],
                     "orange",
                     label="val_acc")

            # Optionally plot other metrics?
            for k, v in hist.history.items():
                if k not in ("loss", "val_loss", "accuracy", "val_accuracy"):
                    plt.plot(np.arange(0, epochs), v, label=k)

            plt.title("Training Loss and Accuracy")
            plt.xlabel("Epoch #")
            plt.ylabel("Loss/Accuracy")
            plt.legend(loc="lower left")
            plt.xlim(0, epochs - 1)
            plt.ylim(bottom=0)
            if save_plot_path: plt.savefig(save_plot_path)
            if show_plot: plt.show()

        self.model.predict(np.zeros((1, *self.input_shape)))  # Init predictor

    def save_model(self,
                   name="trained_1",
                   basepath="../data/models/",
                   save_structure=False):
        # if not name.endswith(".h5"):
        #     name += ".h5"
        print(f"Saving model to: '{basepath}{name}'...")

        if not os.path.exists(basepath):
            os.makedirs(basepath)

        if save_structure:
            model_json = self.model.to_json()
            with open(f"{basepath}{name}.json", "w") as fp:
                fp.write(model_json)

        save_model(self.model, f"{basepath}{name}")
        print("Saving complete.")

    def _predict_move_probability(self, player, check_early_win=True):
        # Predict chance of winning for each move
        # and return column with highest chance.
        max_probability = (0, 0.0)

        print(self.print_player_string(player) + ": Testing cols: |", end="")

        for move in range(self.game.width):
            if not self.game.is_legal_move(move):
                print(style(f" {0.0:.3f} |", Colours.FG.BRIGHT_RED), end="")
                continue

            test_game = Game(board=self.game.board.copy())
            test_game.play_move(player, move)

            if check_early_win and test_game.status == player:
                # Win reached
                print(" win", end="")
                max_probability = (move, 1.0)
                break

            # Get prediction for move (make board positive by adding 1)
            test_input = np.concatenate(
                (test_game.board.flatten(), [player])).reshape(
                    (1, *self.input_shape)) + 1
            prediction = self.model.predict(test_input)[0][
                player + 1]  # [[player_0_prob, draw (?), player_1_prob]]

            if np.isnan(prediction):
                raise Exception("Error: prediction is NaN?")

            print(f" {prediction:.3f} |", end="")

            if prediction > max_probability[1]:
                max_probability = (move, prediction)

        print(
            f"  => Predicted move at col {max_probability[0]} with {max_probability[1] * 100:.2f}%"
        )
        return max_probability

    def predict(self,
                ai_player=-1,
                check_early_win=True,
                prevent_other_win=True):
        # Get (move, chance) that AI wins
        ai_move = self._predict_move_probability(ai_player, check_early_win)

        if prevent_other_win:
            other_player_move = self._predict_move_probability(
                ai_player * -1, check_early_win)

            if other_player_move[1] > ai_move[1]:
                print(style("Trying to prevent", Colours.FG.BRIGHT_RED) \
                    + f" {self.print_player_string(ai_player * -1)} from winning...")
                return other_player_move[0]

        return ai_move[0]