Exemplo n.º 1
0
class BigDataAgent(ExpectiMaxAgent):
    def auto_log(self, data_dir="./data/", max_iter=1000, acc=1):
        filename = data_dir + datetime.datetime.now().strftime(
            '%y%m%d_%H%M%S_%f') + ".csv"
        print("文件保存到:", filename)
        acc_th = (4 * acc - 1) / 3  # 模拟当前正确率
        with open(filename, "w") as csvfile:
            writer = csv.writer(csvfile)
            n_iter = 0
            n_run = 0
            while (n_iter < max_iter):
                if self.game.end:
                    n_run += 1
                    #print("局数:",n_run,"目前数据量:",n_iter)
                    self.game = Game(4, score_to_win=2048, random=False)
                direction = self.step()
                bd = list(self.game.board.flatten())
                bd = [int(s) for s in bd]
                bd = [map_table[i] for i in bd]
                bd.append(direction)
                writer.writerow(bd)

                # 模拟当前正确率 0.72 = x + (1-x)/4 => x = 0.63
                if (random.random() > acc_th):
                    direction = random.randrange(4)
                self.game.move(direction)
                n_iter += 1
Exemplo n.º 2
0
def data_generator_for_CRNN(score_to_begin, score_to_win, batch_size):
    datas = []
    labels = []
    cnt = 0
    while 1:
        game = Game(score_to_win = score_to_win, random = False)
        agent = ExpectiMaxAgent(game)
        while game.end == 0:
            step = agent.step()
            if game.score >= score_to_begin:
                board = board2array(game)
                board1 = np.swapaxes(board, 1, 2)
                board2 = np.swapaxes(board1, 0, 1).reshape((16, 4, 4, 1))
                
                datas.append(board2)
                labels.append(step2array(step))
                cnt += 1
            game.move(step)
            if cnt == batch_size:
                cnt = 0
                datas = np.array(datas)
                labels = np.array(labels)
                yield (datas, labels)
                datas = []
                labels = []
Exemplo n.º 3
0
    def step(self):
        prev_board = self.game.board
        image = [[prev_board[i][j] for j in range(4)] for i in range(4)]
        P, Pcount = self.Board8(image)
        counter = 0
        while True:
            counter += 1
            select = -1
            pmax = -1
            for i in range(4):
                if pmax < Pcount[i]:
                    pmax = Pcount[i]
                    select = i
                elif pmax == Pcount[i] and P[select] < P[i]:
                    pmax = Pcount[i]
                    select = i
            Pcount[select] = -1
            new_game = Game(4, enable_rewrite_board=True)
            new_game.board = prev_board
            new_game.move(3 - select)
            new_board = new_game.board
            isMoved = not (prev_board == new_board).all()
            if isMoved: break
            if counter == 4:
                select = np.argmax(Pcount)
                break

        return (3 - select)
Exemplo n.º 4
0
def data_generator_for_CNN(score_to_begin, score_to_win, batch_size):
    datas = []
    labels = []
    cnt = 0
    while 1:
        game = Game(score_to_win = score_to_win, random = False)
        agent = ExpectiMaxAgent(game)
        while game.end == 0:
            step = agent.step()
            if game.score >= score_to_begin:
                datas.append(board2array(game))
                labels.append(step2array(step))
                cnt += 1
            game.move(step)
            if cnt == batch_size:
                cnt = 0
                datas = np.array(datas)
                labels = np.array(labels)
                yield (datas, labels)
                datas = []
                labels = []
Exemplo n.º 5
0
    def self_test(self):

        import time

        totoal_time = 0
        cnt = 0

        stat = {2048: 0, 1024: 0, 512: 0, 256: 0, 128: 0, 64: 0, 32: 0, 16: 0}
        total = 0
        for i in range(1000):

            if i % 10 == 0:
                print("Test: ", i)

            game = Game(4, 2048)
            while not game.end:
                start = time.clock()
                oht = self.one_hot(game.board)
                direction = self.model.predict(oht[np.newaxis, :, :, :])
                dir = direction.argmax()
                end = time.clock()

                totoal_time += end - start
                cnt += 1

                game.move(dir)
            total += game.score

            for s in [2048, 1024, 512, 256, 128, 64, 32, 16]:
                if game.score >= s:
                    stat[s] += 1

            if i % 10 == 0:
                print("Test: ", i)
                print("Score: ", game.score)

        print("Average Score currently is: ", float(total) / 1000.0)
        print("stat: ", stat)
        print("Time for one step (x second/step): ",
              float(totoal_time) / float(cnt))
Exemplo n.º 6
0
def data_generator(batch_size):
    datas = []
    labels = []
    cnt = 0
    while 1:
        game = Game(score_to_win = 2048, random = False)
        agent = ExpectiMaxAgent(game)
        while game.end == 0:
            step = agent.step()
            board = game.board / 11
            board1 = board.T
            datas.append(np.hstack((board, board1)))
            labels.append(step2array(step))
            cnt += 1
            game.move(step)
            if cnt == batch_size:
                cnt = 0
                datas = np.array(datas)
                labels = np.array(labels)
                yield (datas, labels)
                datas = []
                labels = []
Exemplo n.º 7
0
def get_grids_next_step(grid):
    #Returns the next 4 states s' from the current state s

    grids_list = []

    for movement in range(4):
        grid_before = grid.copy()
        env1 = Game(4, random=False, enable_rewrite_board=True)
        env1.board = grid_before
        try:
            _ = env1.move(movement)
        except:
            pass
        grid_after = env1.board
        grids_list.append(grid_after)

    return grids_list
Exemplo n.º 8
0
class Env2048(object):
    def __init__(self, score_to_win=2048, dim=4, base=2, state=None):
        self.game = Game(size=dim, score_to_win=score_to_win)
        self.dim_ = dim
        self.base_ = base
        self.start_tiles_ = 2

        self.score_to_win = score_to_win
        """
        if state is None:
            self.state_ = self.game.board
        else:
            self.state_ = state.copy()
        """

    def __str__(self):
        conver2char = lambda num: '%5d' % (num) if num > 0 else ' ' * 5
        demarcation = ('+' + '-' * 5) * self.dim_ + '+\n'
        ret = demarcation
        ret += demarcation.join([
            '|' + '|'.join([conver2char(num) for num in row]) + '|\n'
            for row in self.game.board
        ])
        ret += demarcation
        return ret

    def __repr__(self):
        return self.__str__(self)

    def set_state(self, state):
        self.game.board = state

    def get_state(self):
        return self.game.board

    def to_tensor(self):
        return state2tensor(self.game.board)

    def reset(self):
        self.game = Game(size=self.dim_, score_to_win=self.score_to_win)
        return self.game.board

    def step(self, action):
        # 0 left; 1 down; 2 right; 3 up
        score0 = self.game.score
        len_1 = len(self.game._where_empty)
        """
        print('--------------')
        print(self.game.score)
        print(self.game.board)
        print('---------')
        """
        self.game.move(action)

        if self.is_terminate() == 2:
            reward = 20
        elif self.is_terminate() == 0:
            reward = 4
        else:
            reward = -8
        return self.game.board, reward, self.is_terminate(), ''

    def get_return(self):
        return self.game.score

    def is_terminate(self):
        return self.game.end
Exemplo n.º 9
0
    def learn_from_dataset_from_master(self, L, R, group=100000):
        from .expectimax import board_to_move
        self.tch_search_fun = board_to_move

        print("Training: [L, R] = ", L, R)
        X_train = []
        y_train = []
        X_test = []
        y_test = []

        # try:
        #     X_train = np.load("./dataset/X_train_between" + str(L) + "_" + str(R))
        #     t_train = np.load("./dataset/y_train_between" + str(L) + "_" + str(R))
        #     np.load("./dataset/X_test_between" + str(L) + "_" + str(R))
        #     np.load("./dataset/y_test_between" + str(L) + "_" + str(R))
        #
        for i in range(group):
            game = Game(4, 2048)
            while not game.end:
                oht = self.one_hot(game.board)
                good = self.tch_search_fun(game.board)

                if game.score > R:
                    break

                if game.score > L:
                    X_train.append(oht[:, :, :])
                    yi = [0.0, 0.0, 0.0, 0.0]
                    yi[good] = 1.0
                    y_train.append(yi)

                game.move(good)

            if i % 200 == 0:
                print("Generating training data... ", i, "/", group)

        X_train = np.array(X_train)
        y_train = np.array(y_train)
        np.save("./dataset/X_train_between" + str(L) + "_" + str(R), X_train)
        np.save("./dataset/y_train_between" + str(L) + "_" + str(R), y_train)

        for i in range(int(group / 10)):
            game = Game(4, 2048)
            while not game.end:

                # print(game.board.shape, '\n')
                # print(np.expand_dims(game.board, axis=0).shape)
                oht = self.one_hot(game.board)
                good = self.tch_search_fun(game.board)

                if game.score > R:
                    break

                if game.score > L:
                    X_test.append(oht[:, :, :])
                    yi = [0.0, 0.0, 0.0, 0.0]
                    yi[good] = 1.0
                    y_test.append(yi)

                game.move(good)

        X_test = np.array(X_test)
        y_test = np.array(y_test)
        np.save("./dataset/X_test_between" + str(L) + "_" + str(R), X_test)
        np.save("./dataset/y_test_between" + str(L) + "_" + str(R), y_test)

        self.model.fit(X_train,
                       y_train,
                       epochs=10,
                       batch_size=128,
                       validation_split=0.05)
        self.model.save(filepath=self.model_path)

        score = self.model.evaluate(X_test, y_test, batch_size=128)
        print("Score: ", score)

        total = 0
        for i in range(1000):
            game = Game(4, 2048)
            while not game.end:
                oht = self.one_hot(game.board)
                direction = self.model.predict(oht[np.newaxis, :, :, :])
                game.move(direction.argmax())
            total += game.score

        print("Average Score currently is: ", float(total) / 1000.0)
Exemplo n.º 10
0
    def improve_from_dataset(self, goal=2048, group=10000, go_by_self=True):
        from .expectimax import board_to_move
        self.tch_search_fun = board_to_move

        stable = 128
        satisfied = 0

        max_score = 0

        cnt = 0
        while True:
            cnt += 1
            X_train = []
            y_train = []
            X_test = []
            y_test = []
            for i in range(group):
                game = Game(4, goal)
                while not game.end:

                    # print(game.board.shape, '\n')
                    # print(np.expand_dims(game.board, axis=0).shape)
                    oht = self.one_hot(game.board)
                    direction = self.model.predict(oht[np.newaxis, :, :, :])
                    good = self.tch_search_fun(game.board)

                    X_train.append(oht[:, :, :])
                    yi = [0.0, 0.0, 0.0, 0.0]
                    yi[good] = 1.0
                    y_train.append(yi)
                    if go_by_self:
                        game.move(direction.argmax())
                    else:
                        game.move(good)

                if i % 100 == 0:
                    print("Generating training data...", i)

            for i in range(int(group / 10)):
                game = Game(4, goal)
                while not game.end:

                    # print(game.board.shape, '\n')
                    # print(np.expand_dims(game.board, axis=0).shape)
                    oht = self.one_hot(game.board)
                    direction = self.model.predict(oht[np.newaxis, :, :, :])
                    good = self.tch_search_fun(game.board)

                    X_test.append(oht[:, :, :])
                    yi = [0.0, 0.0, 0.0, 0.0]
                    yi[good] = 1.0
                    y_test.append(yi)
                    if go_by_self:
                        game.move(direction.argmax())
                    else:
                        game.move(good)

                if i % 100 == 0:
                    print("Generating testing data...", i)

            X_train = np.array(X_train)
            y_train = np.array(y_train)
            X_test = np.array(X_test)
            y_test = np.array(y_test)

            # np.save("./dataset/old_X_train_" + str(cnt), X_train)
            # np.save("./dataset/old_y_train_" + str(cnt), y_train)
            # np.save("./dataset/old_X_test_"  + str(cnt), X_test)
            # np.save("./dataset/old_y_test_"  + str(cnt), y_test)

            self.model.fit(X_train,
                           y_train,
                           epochs=10,
                           batch_size=128,
                           validation_split=0.05)
            self.model.save(filepath=self.model_path)

            score = self.model.evaluate(X_test, y_test, batch_size=128)
            print("Iteration time:", cnt)
            print("Score: ", score)

            total = 0
            for i in range(1000):
                game = Game(4, goal)
                while not game.end:
                    oht = self.one_hot(game.board)
                    direction = self.model.predict(oht[np.newaxis, :, :, :])
                    game.move(direction.argmax())
                total += game.score

            print("Average Score currently is: ", float(total) / 1000.0)
            if float(total) / 1000.0 > 700:
                break
Exemplo n.º 11
0
    def multi_level_multi_model_learn(self, itr_time, seq=0):
        from .expectimax import board_to_move
        self.tch_search_fun = board_to_move

        path128 = "./model_multi/multi128.h5"
        path256 = "./model_multi/multi256.h5"
        path512 = "./model_multi/multi512.h5"
        path1024 = "./model_multi/multi1024.h5"

        batch_size_128 = 32
        batch_size_256 = 64
        batch_size_512 = 128
        batch_size_1024 = 256

        try:
            self.model128 = tf.keras.models.load_model(path128)
            self.model256 = tf.keras.models.load_model(path256)
            self.model512 = tf.keras.models.load_model(path512)
            # self.model1024 = tf.keras.models.load_model(path1024)
        except:
            print("Loar error, new models created")
            self.model128 = self.new_model()
            self.model256 = self.new_model()
            self.model512 = self.new_model()
            # self.model1024 = self.new_model()

        max_score = 0

        X_train_128 = []
        y_train_128 = []
        X_train_256 = []
        y_train_256 = []
        X_train_512 = []
        y_train_512 = []
        # X_train_1024 = []
        # y_train_1024 = []
        for i in range(itr_time):
            if (i % 20 == 0):
                print("Generating Training Data: ", i)

            game = Game(4, 2048)
            while game.score < 1024:

                # print(game.board.shape, '\n')
                # print(np.expand_dims(game.board, axis=0).shape)
                oht = self.one_hot(game.board)
                good = self.tch_search_fun(game.board)

                if game.score <= 128:
                    X_train_128.append(oht[:, :, :])
                    yi = [0.0, 0.0, 0.0, 0.0]
                    yi[good] = 1.0
                    y_train_128.append(yi)

                if game.score == 256:
                    X_train_256.append(oht[:, :, :])
                    yi = [0.0, 0.0, 0.0, 0.0]
                    yi[good] = 1.0
                    y_train_256.append(yi)

                if game.score == 512:
                    X_train_512.append(oht[:, :, :])
                    yi = [0.0, 0.0, 0.0, 0.0]
                    yi[good] = 1.0
                    y_train_512.append(yi)

                # if game.score == 1024:
                #     X_train_512.append(oht[:, :, :])
                #     yi = [0.0, 0.0, 0.0, 0.0]
                #     yi[good] = 1.0
                #     y_train_512.append(yi)

                game.move(good)

        X_train_128 = np.array(X_train_128)
        y_train_128 = np.array(y_train_128)

        X_train_256 = np.array(X_train_256)
        y_train_256 = np.array(y_train_256)

        X_train_512 = np.array(X_train_512)
        y_train_512 = np.array(y_train_512)

        # X_train_1024 = np.array(X_train_1024)
        # y_train_1024 = np.array(y_train_1024)

        np.save("X_train_multi_model_128_" + str(seq), X_train_128)
        np.save("X_train_multi_model_256_" + str(seq), X_train_256)
        np.save("X_train_multi_model_512_" + str(seq), X_train_512)
        # np.save("X_train_multi_model_1024_" + str(seq), X_train_1024)
        np.save("y_train_multi_model_128_" + str(seq), y_train_128)
        np.save("y_train_multi_model_256_" + str(seq), y_train_256)
        np.save("y_train_multi_model_512_" + str(seq), y_train_512)
        # np.save("y_train_multi_model_1024", y_train_1024)

        self.model128.fit(X_train_128,
                          y_train_128,
                          epochs=10,
                          batch_size=128,
                          validation_split=0.05)

        self.model256.fit(X_train_256,
                          y_train_256,
                          epochs=10,
                          batch_size=128,
                          validation_split=0.05)

        self.model512.fit(X_train_512,
                          y_train_512,
                          epochs=10,
                          batch_size=128,
                          validation_split=0.05)

        # self.model.fit(X_train, y_train,
        #                epochs=10, batch_size=128,
        #                validation_split=0.05)

        self.model128.save(filepath=path128)
        self.model256.save(filepath=path256)
        self.model512.save(filepath=path512)
        # self.model1024.save(filepath=path1024)

        stat = {2048: 0, 1024: 0, 512: 0, 256: 0, 128: 0, 64: 0, 32: 0, 16: 0}
        total = 0
        for i in range(1000):
            game = Game(4, 2048)
            while not game.end:
                oht = self.one_hot(game.board)
                direction = None

                if game.score <= 128:
                    direction = self.model128.predict(oht[np.newaxis, :, :, :])

                if game.score == 256:
                    direction = self.model256.predict(oht[np.newaxis, :, :, :])

                if game.score == 512:
                    direction = self.model512.predict(oht[np.newaxis, :, :, :])

                game.move(direction.argmax())
            total += game.score

            for s in [2048, 1024, 512, 256, 128, 64, 32, 16]:
                if game.score >= s:
                    stat[s] += 1

        print("Average Score in 1000 iteration currently is: ",
              float(total) / 1000.0)
        print("stat: ", stat)
Exemplo n.º 12
0
    def multi_level_learn(self, batch_size=128, goal=2048):
        from .expectimax import board_to_move
        self.tch_search_fun = board_to_move
        stable = 128
        satisfied = 0

        max_score = 0

        # train over and over again
        i = 0
        cnt = 0
        cnt1 = 0
        while stable != goal:
            i += 1

            X_train = []
            y_train = []
            loss = acc = 0
            game = Game(4, goal)
            while not game.end:

                # print(game.board.shape, '\n')
                # print(np.expand_dims(game.board, axis=0).shape)
                oht = self.one_hot(game.board)
                direction = self.model.predict(oht[np.newaxis, :, :, :])
                good = self.tch_search_fun(game.board)

                # only learn useful things
                if game.score >= stable or stable <= 64:
                    X_train.append(oht[:, :, :])
                    yi = [0.0, 0.0, 0.0, 0.0]
                    yi[good] = 1.0
                    y_train.append(yi)
                    cnt += 1

                cnt1 += 1
                if cnt == batch_size and X_train != [] and y_train != []:
                    # print(set_of_lengths(X_train))
                    loss, acc = self.model.train_on_batch(
                        np.array(X_train), np.array(y_train))
                    if cnt1 % 200 == 0:
                        print("Loss\tAcc")
                        print(loss, acc)

                    X_train = []
                    y_train = []
                    cnt = 0

                game.move(direction.argmax())

            if game.score >= stable * 2:
                satisfied += 1

            if (i % 20 == 0):
                print("Training Number: ", i)
                print("Score: ", game.score)
                print("Stable: ", stable)
                # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
                # print(sess)

            if i % 200 == 0:
                self.model.save(filepath=self.model_path)

            # check if go to next stage
            if i % 1000 == 0 and stable <= 32:
                print("Stable proportion: ", float(satisfied) / 1000)
                if float(satisfied) / 1000 > 0.96:
                    stable *= 2
                satisfied = 0

            if i % 1000 == 0 and stable == 64:
                print("Stable proportion: ", float(satisfied) / 1000)
                if float(satisfied) / 1000 > 0.94:
                    stable *= 2
                satisfied = 0

            if i % 1000 == 0 and stable == 128:
                print("Stable proportion: ", float(satisfied) / 1000)
                if float(satisfied) / 1000 > 0.9:
                    stable *= 2
                satisfied = 0

            if i % 1000 == 0 and stable == 256:
                print("Stable proportion: ", float(satisfied) / 1000)
                if float(satisfied) / 1000 > 0.85:
                    stable *= 2
                satisfied = 0

            if i % 1000 == 0 and stable == 512:
                print("Stable proportion: ", float(satisfied) / 1000)
                if float(satisfied) / 1000 > 0.8:
                    stable *= 2
                satisfied = 0

            if i % 1000 == 0 and stable == 1024:
                print("Stable proportion: ", float(satisfied) / 1000)
                if float(satisfied) / 1000 > 0.5:
                    stable *= 2
                satisfied = 0

        self.model.save(filepath=self.model_path)
Exemplo n.º 13
0
    def learn(self,
              itr_time=5,
              batch_size=128,
              goal=2048,
              dynamic_batch=False):
        from .expectimax import board_to_move
        self.tch_search_fun = board_to_move

        if dynamic_batch:
            batch_size = 8

        max_score = 0
        stable = 8
        satisfied = 0

        stat = {2048: 0, 1024: 0, 512: 0, 256: 0, 128: 0, 64: 0, 32: 0, 16: 0}

        # train over and over again
        cnt = 0
        cnt1 = 0
        for i in range(itr_time):

            X_train = []
            y_train = []
            loss = acc = 0
            game = Game(4, goal)
            while not game.end:

                # print(game.board.shape, '\n')
                # print(np.expand_dims(game.board, axis=0).shape)
                oht = self.one_hot(game.board)
                direction = self.model.predict(oht[np.newaxis, :, :, :])
                good = self.tch_search_fun(game.board)

                X_train.append(oht[:, :, :])
                yi = [0.0, 0.0, 0.0, 0.0]
                yi[good] = 1.0
                y_train.append(yi)
                # y_train.append(yi)

                cnt += 1
                cnt1 += 1
                if cnt == batch_size:
                    # print(set_of_lengths(X_train))
                    loss, acc = self.model.train_on_batch(
                        np.array(X_train), np.array(y_train))
                    if cnt1 % 200 == 0:
                        print("Loss\tAcc")
                        print(loss, acc)

                    X_train = []
                    y_train = []
                    cnt = 0

                if cnt1 % 1000 == 0:
                    print("Agent: ", direction)
                    print("Good: ", yi)
                    cnt1 = 0

                game.move(direction.argmax())

            if (i % 20 == 0):
                print("Training Number: ", i)
                print("Score: ", game.score)
                print("Stable: ", stable)
                # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
                # print(sess)

            if i % 500 == 0:
                self.model.save(filepath=self.model_path)

            # increase batch size
            if dynamic_batch and game.score > max_score:
                max_score = game.score
                batch_size = max_score / 4
                print("Higher score occurred, increase batch_size to",
                      batch_size)
                print("Current Max Score is", max_score)

            for s in [2048, 1024, 512, 256, 128, 64, 32, 16]:
                if game.score >= s:
                    stat[s] += 1

            if game.score >= stable * 2:
                satisfied += 1

            # check if go to next stage
            if i % 1000 == 0:
                print("stat: ", stat)
                print("stable", stable)
                if float(satisfied) / 1000 > 0.96:
                    stable *= 2
                satisfied = 0
                for s in [2048, 1024, 512, 256, 128, 64, 32, 16]:
                    stat[s] = 0

        self.model.save(filepath=self.model_path)
Exemplo n.º 14
0
display1 = Display()
display2 = IPythonDisplay()
model = keras.models.load_model('model.h5')

image = []
label = []
for i in range(0, 10):
    game = Game(4, score_to_win=2048, random=False)
    agent = ExpectiMaxAgent(game, display=display1)

    while game.end == False:

        direction = agent.step()
        image.append(game.board)
        label.append(direction)
        game.move(direction)

    display1.display(game)
#运行10次游戏并记录棋盘和方向
x_train = np.array(image)
y_train = np.array(label)

x_train = np.log2(x_train + 1)
x_train = np.trunc(x_train)
x_train = keras.utils.to_categorical(x_train, 12)

print(x_train.shape)
y_train = keras.utils.to_categorical(y_train, NUM_CLASSES)

model.train_on_batch(x_train, y_train)
Exemplo n.º 15
0
#        print (game.board)
#        print ("direction: ", direction)
        if game.board.max() < 256:

            for i in range(4):
                for j in range(4):
                    #f.write(game.board[i,j])
                    print(game.board[i, j], file=f1)
            print(direction, file=f1)

        elif game.board.max() < 512:

            for i in range(4):
                for j in range(4):
                    #f.write(game.board[i,j])
                    print(game.board[i, j], file=f2)
            print(direction, file=f2)

        else:

            for i in range(4):
                for j in range(4):
                    #f.write(game.board[i,j])
                    print(game.board[i, j], file=f3)
            print(direction, file=f3)
        #f.write(direction)

        game.move(direction)

    #f.write('\n')
Exemplo n.º 16
0
display2 = Display()

stop_number = 2048
size = int(np.log2(stop_number)) +1    #跑到stop number时所需的one-hot编码位数

for i in range(0,500):   #跑500次棋盘,跑到stop_number停止
    game = Game(4, score_to_win=2048, random=False)
    agent = ExpectiMaxAgent(game, display=display1)  #使用强Agent
    
    while game.end==False:
        a=np.array(game.board)
        
        direction=agent.step()
        image.append(game.board)
        label.append(direction)
        game.move(direction)
        if np.amax(a)==stop_number:
            break
       
    display1.display(game)
    
image=np.array(image)   #将得到的数据和标签转换为numpy数组
label=np.array(label)


#划分训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(image, label, test_size = 0.1, random_state= 30)

size = int(np.log2(stop_number)) +1    #跑到stop number时所需的one-hot编码位数

input_shape = (4, 4, size)
Exemplo n.º 17
0
        super().__init__(game, display)
        self.testgame = Game(4, random=False)
        self.testgame.enable_rewrite_board = True

    def step(self):
        piece = [
            map_table[k]
            for k in self.game.board.astype(int).flatten().tolist()
        ]
        x0 = np.array([grid_one(np.array(piece).reshape(4, 4))])
        preds = list(model.predict(x0))
        direction = np.argmax(preds[0])
        return direction


steps = 0
scores = []
time_start = time.time()
for i in range(ntest):
    game = Game(4, random=False)
    agent = MyAgent(game, display=None)
    while not game.end:
        game.move(agent.step())
        steps += 1
    scores.append(game.score)
time_end = time.time()
print("steps", steps)
print('totally cost', time_end - time_start)
print("\n", scores)
print("Average scores: @%d times" % ntest, sum(scores) / len(scores))
Exemplo n.º 18
0
                 inputboard[0, p, q, 0] = 1
             else:
                 inputboard[0, p, q, int(np.log2(num))] = 1
     if maxNum <= 256:
         boards_256.append(inputboard[0])
         directions_256.append(rightDirection)
         myDirection = model_256.predict(inputboard).tolist()[0]
     elif maxNum == 512:
         boards_512.append(inputboard[0])
         directions_512.append(rightDirection)
         myDirection = model_512.predict(inputboard).tolist()[0]
     elif maxNum == 1024:
         boards_1024.append(inputboard[0])
         directions_1024.append(rightDirection)
         myDirection = model_1024.predict(inputboard).tolist()[0]
     game.move(myDirection.index(max(myDirection)))
 print('len(boards_256) = ', len(boards_256))
 # print ('len(boards_512) = ', lesourcn(boards_512))
 print('len(boards_512) = ', len(boards_512))
 print('len(boards_1024) = ', len(boards_1024))
 if len(boards_256) >= 200000:
     # convert to numpy array
     boards_256 = np.array(boards_256)
     directions_256 = np.array(directions_256)
     # convert to one-hot encoding
     directions_256 = keras.utils.to_categorical(directions_256,
                                                 num_classes=NUM_CLASSES)
     # train
     print("training on model_256")
     model_256.fit(boards_256,
                   directions_256,
Exemplo n.º 19
0
            if np.sum(game.board) > 384:
                break
            a = np.array(game.board)
            a = np.log2(a + 1)
            a = np.trunc(a)
            a = keras.utils.to_categorical(a, board_class)
            a = a.reshape(1, 4, 4, board_class)
            prediction = model.predict(a, batch_size=128)
            b = prediction[0]
            b = b.tolist()
            direction2 = b.index(max(b))
            direction1 = agent1.step()

            boards.append(game.board)
            directions.append(direction1)
            game.move(direction2)
        display1.display(game)
        if np.amax(game.board) == 1024:
            count += 1

    if count > 98:
        break
    else:
        boards = np.array(boards)
        directions = np.array(directions)

        x_train, x_test, y_train, y_test = train_test_split(boards,
                                                            directions,
                                                            test_size=0.01,
                                                            random_state=30)
        x_train = np.log2(x_train + 1)
Exemplo n.º 20
0
    game = Game(4, score_to_win=2048, random=False)
    agent_exp = ExpectiMaxAgent(game)
    agent = MyAgent(game)
    while (game.score <= 1024) and (not game.end):
        A = game.board
        A[A == 0] = 1
        A = np.log2(A)
        A = np.int32(A)
        A = A.reshape(16)
        dir = agent.step()
        # you can change the condition to get different data
        if game.score >= 512:
            dir_exp = agent_exp.step()
            results.append(A)
            direction.append(dir_exp)
        game.move(dir)
    if 0 == i % 100:
        # save the result every 100 games
        results = np.array(results)
        direction = np.array(direction)
        final_results = np.c_[results, direction]
        final_results = pd.DataFrame(final_results)
        final_results.to_csv("data/data_online_1024.csv",
                             index=False,
                             header=False,
                             mode='a+')

        results = []
        direction = []
    i += 1
Exemplo n.º 21
0
from game2048.agents import MyAgent
from game2048.displays import Display
import csv
import os

game_size = 4
score_to_win = 2048
iter_num = 3000

game = Game(game_size, score_to_win)
board = game.board
agenta = ExpectiMaxAgent(game, Display())
agentb = MyAgent(game, Display())
directiona = agenta.step()
directionb = agentb.step()
board = game.move(directionb)

i = 0
dic = {}
idx = 0

# save file
filename = '/home/olivia/PycharmProjects/2048/game2048/data/traindata10.csv'
if os.path.exists(filename):
    start = True
else:
    start = False
    os.mknod(filename)

with open(filename, "a") as csvfile:
    writer = csv.writer(csvfile)
Exemplo n.º 22
0
model = RCNN_model()

model.load_weights("checkpoints/checkpoint.hdf5")


def reshape_board(board):
    res = np.zeros((4, 4), dtype=float)
    for i in range(4):
        for j in range(4):
            k = int(board[i, j])
            if k != 0:
                res[i, j] = np.log2(k) / 11

    res1 = res.T
    return np.hstack((res, res1))


game3 = Game(score_to_win=2048, random=False)
display3 = Display()

while game3.end == 0:
    display3.display(game3)
    # agent1 = ExpectiMaxAgent(game3)
    board = np.array([reshape_board(game3.board)])
    prediction = model.predict(board)
    step = np.argmax(prediction, axis=1)
    # step = agent1.step()

    game3.move(step)