class BigDataAgent(ExpectiMaxAgent): def auto_log(self, data_dir="./data/", max_iter=1000, acc=1): filename = data_dir + datetime.datetime.now().strftime( '%y%m%d_%H%M%S_%f') + ".csv" print("文件保存到:", filename) acc_th = (4 * acc - 1) / 3 # 模拟当前正确率 with open(filename, "w") as csvfile: writer = csv.writer(csvfile) n_iter = 0 n_run = 0 while (n_iter < max_iter): if self.game.end: n_run += 1 #print("局数:",n_run,"目前数据量:",n_iter) self.game = Game(4, score_to_win=2048, random=False) direction = self.step() bd = list(self.game.board.flatten()) bd = [int(s) for s in bd] bd = [map_table[i] for i in bd] bd.append(direction) writer.writerow(bd) # 模拟当前正确率 0.72 = x + (1-x)/4 => x = 0.63 if (random.random() > acc_th): direction = random.randrange(4) self.game.move(direction) n_iter += 1
def data_generator_for_CRNN(score_to_begin, score_to_win, batch_size): datas = [] labels = [] cnt = 0 while 1: game = Game(score_to_win = score_to_win, random = False) agent = ExpectiMaxAgent(game) while game.end == 0: step = agent.step() if game.score >= score_to_begin: board = board2array(game) board1 = np.swapaxes(board, 1, 2) board2 = np.swapaxes(board1, 0, 1).reshape((16, 4, 4, 1)) datas.append(board2) labels.append(step2array(step)) cnt += 1 game.move(step) if cnt == batch_size: cnt = 0 datas = np.array(datas) labels = np.array(labels) yield (datas, labels) datas = [] labels = []
def step(self): prev_board = self.game.board image = [[prev_board[i][j] for j in range(4)] for i in range(4)] P, Pcount = self.Board8(image) counter = 0 while True: counter += 1 select = -1 pmax = -1 for i in range(4): if pmax < Pcount[i]: pmax = Pcount[i] select = i elif pmax == Pcount[i] and P[select] < P[i]: pmax = Pcount[i] select = i Pcount[select] = -1 new_game = Game(4, enable_rewrite_board=True) new_game.board = prev_board new_game.move(3 - select) new_board = new_game.board isMoved = not (prev_board == new_board).all() if isMoved: break if counter == 4: select = np.argmax(Pcount) break return (3 - select)
def data_generator_for_CNN(score_to_begin, score_to_win, batch_size): datas = [] labels = [] cnt = 0 while 1: game = Game(score_to_win = score_to_win, random = False) agent = ExpectiMaxAgent(game) while game.end == 0: step = agent.step() if game.score >= score_to_begin: datas.append(board2array(game)) labels.append(step2array(step)) cnt += 1 game.move(step) if cnt == batch_size: cnt = 0 datas = np.array(datas) labels = np.array(labels) yield (datas, labels) datas = [] labels = []
def self_test(self): import time totoal_time = 0 cnt = 0 stat = {2048: 0, 1024: 0, 512: 0, 256: 0, 128: 0, 64: 0, 32: 0, 16: 0} total = 0 for i in range(1000): if i % 10 == 0: print("Test: ", i) game = Game(4, 2048) while not game.end: start = time.clock() oht = self.one_hot(game.board) direction = self.model.predict(oht[np.newaxis, :, :, :]) dir = direction.argmax() end = time.clock() totoal_time += end - start cnt += 1 game.move(dir) total += game.score for s in [2048, 1024, 512, 256, 128, 64, 32, 16]: if game.score >= s: stat[s] += 1 if i % 10 == 0: print("Test: ", i) print("Score: ", game.score) print("Average Score currently is: ", float(total) / 1000.0) print("stat: ", stat) print("Time for one step (x second/step): ", float(totoal_time) / float(cnt))
def data_generator(batch_size): datas = [] labels = [] cnt = 0 while 1: game = Game(score_to_win = 2048, random = False) agent = ExpectiMaxAgent(game) while game.end == 0: step = agent.step() board = game.board / 11 board1 = board.T datas.append(np.hstack((board, board1))) labels.append(step2array(step)) cnt += 1 game.move(step) if cnt == batch_size: cnt = 0 datas = np.array(datas) labels = np.array(labels) yield (datas, labels) datas = [] labels = []
def get_grids_next_step(grid): #Returns the next 4 states s' from the current state s grids_list = [] for movement in range(4): grid_before = grid.copy() env1 = Game(4, random=False, enable_rewrite_board=True) env1.board = grid_before try: _ = env1.move(movement) except: pass grid_after = env1.board grids_list.append(grid_after) return grids_list
class Env2048(object): def __init__(self, score_to_win=2048, dim=4, base=2, state=None): self.game = Game(size=dim, score_to_win=score_to_win) self.dim_ = dim self.base_ = base self.start_tiles_ = 2 self.score_to_win = score_to_win """ if state is None: self.state_ = self.game.board else: self.state_ = state.copy() """ def __str__(self): conver2char = lambda num: '%5d' % (num) if num > 0 else ' ' * 5 demarcation = ('+' + '-' * 5) * self.dim_ + '+\n' ret = demarcation ret += demarcation.join([ '|' + '|'.join([conver2char(num) for num in row]) + '|\n' for row in self.game.board ]) ret += demarcation return ret def __repr__(self): return self.__str__(self) def set_state(self, state): self.game.board = state def get_state(self): return self.game.board def to_tensor(self): return state2tensor(self.game.board) def reset(self): self.game = Game(size=self.dim_, score_to_win=self.score_to_win) return self.game.board def step(self, action): # 0 left; 1 down; 2 right; 3 up score0 = self.game.score len_1 = len(self.game._where_empty) """ print('--------------') print(self.game.score) print(self.game.board) print('---------') """ self.game.move(action) if self.is_terminate() == 2: reward = 20 elif self.is_terminate() == 0: reward = 4 else: reward = -8 return self.game.board, reward, self.is_terminate(), '' def get_return(self): return self.game.score def is_terminate(self): return self.game.end
def learn_from_dataset_from_master(self, L, R, group=100000): from .expectimax import board_to_move self.tch_search_fun = board_to_move print("Training: [L, R] = ", L, R) X_train = [] y_train = [] X_test = [] y_test = [] # try: # X_train = np.load("./dataset/X_train_between" + str(L) + "_" + str(R)) # t_train = np.load("./dataset/y_train_between" + str(L) + "_" + str(R)) # np.load("./dataset/X_test_between" + str(L) + "_" + str(R)) # np.load("./dataset/y_test_between" + str(L) + "_" + str(R)) # for i in range(group): game = Game(4, 2048) while not game.end: oht = self.one_hot(game.board) good = self.tch_search_fun(game.board) if game.score > R: break if game.score > L: X_train.append(oht[:, :, :]) yi = [0.0, 0.0, 0.0, 0.0] yi[good] = 1.0 y_train.append(yi) game.move(good) if i % 200 == 0: print("Generating training data... ", i, "/", group) X_train = np.array(X_train) y_train = np.array(y_train) np.save("./dataset/X_train_between" + str(L) + "_" + str(R), X_train) np.save("./dataset/y_train_between" + str(L) + "_" + str(R), y_train) for i in range(int(group / 10)): game = Game(4, 2048) while not game.end: # print(game.board.shape, '\n') # print(np.expand_dims(game.board, axis=0).shape) oht = self.one_hot(game.board) good = self.tch_search_fun(game.board) if game.score > R: break if game.score > L: X_test.append(oht[:, :, :]) yi = [0.0, 0.0, 0.0, 0.0] yi[good] = 1.0 y_test.append(yi) game.move(good) X_test = np.array(X_test) y_test = np.array(y_test) np.save("./dataset/X_test_between" + str(L) + "_" + str(R), X_test) np.save("./dataset/y_test_between" + str(L) + "_" + str(R), y_test) self.model.fit(X_train, y_train, epochs=10, batch_size=128, validation_split=0.05) self.model.save(filepath=self.model_path) score = self.model.evaluate(X_test, y_test, batch_size=128) print("Score: ", score) total = 0 for i in range(1000): game = Game(4, 2048) while not game.end: oht = self.one_hot(game.board) direction = self.model.predict(oht[np.newaxis, :, :, :]) game.move(direction.argmax()) total += game.score print("Average Score currently is: ", float(total) / 1000.0)
def improve_from_dataset(self, goal=2048, group=10000, go_by_self=True): from .expectimax import board_to_move self.tch_search_fun = board_to_move stable = 128 satisfied = 0 max_score = 0 cnt = 0 while True: cnt += 1 X_train = [] y_train = [] X_test = [] y_test = [] for i in range(group): game = Game(4, goal) while not game.end: # print(game.board.shape, '\n') # print(np.expand_dims(game.board, axis=0).shape) oht = self.one_hot(game.board) direction = self.model.predict(oht[np.newaxis, :, :, :]) good = self.tch_search_fun(game.board) X_train.append(oht[:, :, :]) yi = [0.0, 0.0, 0.0, 0.0] yi[good] = 1.0 y_train.append(yi) if go_by_self: game.move(direction.argmax()) else: game.move(good) if i % 100 == 0: print("Generating training data...", i) for i in range(int(group / 10)): game = Game(4, goal) while not game.end: # print(game.board.shape, '\n') # print(np.expand_dims(game.board, axis=0).shape) oht = self.one_hot(game.board) direction = self.model.predict(oht[np.newaxis, :, :, :]) good = self.tch_search_fun(game.board) X_test.append(oht[:, :, :]) yi = [0.0, 0.0, 0.0, 0.0] yi[good] = 1.0 y_test.append(yi) if go_by_self: game.move(direction.argmax()) else: game.move(good) if i % 100 == 0: print("Generating testing data...", i) X_train = np.array(X_train) y_train = np.array(y_train) X_test = np.array(X_test) y_test = np.array(y_test) # np.save("./dataset/old_X_train_" + str(cnt), X_train) # np.save("./dataset/old_y_train_" + str(cnt), y_train) # np.save("./dataset/old_X_test_" + str(cnt), X_test) # np.save("./dataset/old_y_test_" + str(cnt), y_test) self.model.fit(X_train, y_train, epochs=10, batch_size=128, validation_split=0.05) self.model.save(filepath=self.model_path) score = self.model.evaluate(X_test, y_test, batch_size=128) print("Iteration time:", cnt) print("Score: ", score) total = 0 for i in range(1000): game = Game(4, goal) while not game.end: oht = self.one_hot(game.board) direction = self.model.predict(oht[np.newaxis, :, :, :]) game.move(direction.argmax()) total += game.score print("Average Score currently is: ", float(total) / 1000.0) if float(total) / 1000.0 > 700: break
def multi_level_multi_model_learn(self, itr_time, seq=0): from .expectimax import board_to_move self.tch_search_fun = board_to_move path128 = "./model_multi/multi128.h5" path256 = "./model_multi/multi256.h5" path512 = "./model_multi/multi512.h5" path1024 = "./model_multi/multi1024.h5" batch_size_128 = 32 batch_size_256 = 64 batch_size_512 = 128 batch_size_1024 = 256 try: self.model128 = tf.keras.models.load_model(path128) self.model256 = tf.keras.models.load_model(path256) self.model512 = tf.keras.models.load_model(path512) # self.model1024 = tf.keras.models.load_model(path1024) except: print("Loar error, new models created") self.model128 = self.new_model() self.model256 = self.new_model() self.model512 = self.new_model() # self.model1024 = self.new_model() max_score = 0 X_train_128 = [] y_train_128 = [] X_train_256 = [] y_train_256 = [] X_train_512 = [] y_train_512 = [] # X_train_1024 = [] # y_train_1024 = [] for i in range(itr_time): if (i % 20 == 0): print("Generating Training Data: ", i) game = Game(4, 2048) while game.score < 1024: # print(game.board.shape, '\n') # print(np.expand_dims(game.board, axis=0).shape) oht = self.one_hot(game.board) good = self.tch_search_fun(game.board) if game.score <= 128: X_train_128.append(oht[:, :, :]) yi = [0.0, 0.0, 0.0, 0.0] yi[good] = 1.0 y_train_128.append(yi) if game.score == 256: X_train_256.append(oht[:, :, :]) yi = [0.0, 0.0, 0.0, 0.0] yi[good] = 1.0 y_train_256.append(yi) if game.score == 512: X_train_512.append(oht[:, :, :]) yi = [0.0, 0.0, 0.0, 0.0] yi[good] = 1.0 y_train_512.append(yi) # if game.score == 1024: # X_train_512.append(oht[:, :, :]) # yi = [0.0, 0.0, 0.0, 0.0] # yi[good] = 1.0 # y_train_512.append(yi) game.move(good) X_train_128 = np.array(X_train_128) y_train_128 = np.array(y_train_128) X_train_256 = np.array(X_train_256) y_train_256 = np.array(y_train_256) X_train_512 = np.array(X_train_512) y_train_512 = np.array(y_train_512) # X_train_1024 = np.array(X_train_1024) # y_train_1024 = np.array(y_train_1024) np.save("X_train_multi_model_128_" + str(seq), X_train_128) np.save("X_train_multi_model_256_" + str(seq), X_train_256) np.save("X_train_multi_model_512_" + str(seq), X_train_512) # np.save("X_train_multi_model_1024_" + str(seq), X_train_1024) np.save("y_train_multi_model_128_" + str(seq), y_train_128) np.save("y_train_multi_model_256_" + str(seq), y_train_256) np.save("y_train_multi_model_512_" + str(seq), y_train_512) # np.save("y_train_multi_model_1024", y_train_1024) self.model128.fit(X_train_128, y_train_128, epochs=10, batch_size=128, validation_split=0.05) self.model256.fit(X_train_256, y_train_256, epochs=10, batch_size=128, validation_split=0.05) self.model512.fit(X_train_512, y_train_512, epochs=10, batch_size=128, validation_split=0.05) # self.model.fit(X_train, y_train, # epochs=10, batch_size=128, # validation_split=0.05) self.model128.save(filepath=path128) self.model256.save(filepath=path256) self.model512.save(filepath=path512) # self.model1024.save(filepath=path1024) stat = {2048: 0, 1024: 0, 512: 0, 256: 0, 128: 0, 64: 0, 32: 0, 16: 0} total = 0 for i in range(1000): game = Game(4, 2048) while not game.end: oht = self.one_hot(game.board) direction = None if game.score <= 128: direction = self.model128.predict(oht[np.newaxis, :, :, :]) if game.score == 256: direction = self.model256.predict(oht[np.newaxis, :, :, :]) if game.score == 512: direction = self.model512.predict(oht[np.newaxis, :, :, :]) game.move(direction.argmax()) total += game.score for s in [2048, 1024, 512, 256, 128, 64, 32, 16]: if game.score >= s: stat[s] += 1 print("Average Score in 1000 iteration currently is: ", float(total) / 1000.0) print("stat: ", stat)
def multi_level_learn(self, batch_size=128, goal=2048): from .expectimax import board_to_move self.tch_search_fun = board_to_move stable = 128 satisfied = 0 max_score = 0 # train over and over again i = 0 cnt = 0 cnt1 = 0 while stable != goal: i += 1 X_train = [] y_train = [] loss = acc = 0 game = Game(4, goal) while not game.end: # print(game.board.shape, '\n') # print(np.expand_dims(game.board, axis=0).shape) oht = self.one_hot(game.board) direction = self.model.predict(oht[np.newaxis, :, :, :]) good = self.tch_search_fun(game.board) # only learn useful things if game.score >= stable or stable <= 64: X_train.append(oht[:, :, :]) yi = [0.0, 0.0, 0.0, 0.0] yi[good] = 1.0 y_train.append(yi) cnt += 1 cnt1 += 1 if cnt == batch_size and X_train != [] and y_train != []: # print(set_of_lengths(X_train)) loss, acc = self.model.train_on_batch( np.array(X_train), np.array(y_train)) if cnt1 % 200 == 0: print("Loss\tAcc") print(loss, acc) X_train = [] y_train = [] cnt = 0 game.move(direction.argmax()) if game.score >= stable * 2: satisfied += 1 if (i % 20 == 0): print("Training Number: ", i) print("Score: ", game.score) print("Stable: ", stable) # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) # print(sess) if i % 200 == 0: self.model.save(filepath=self.model_path) # check if go to next stage if i % 1000 == 0 and stable <= 32: print("Stable proportion: ", float(satisfied) / 1000) if float(satisfied) / 1000 > 0.96: stable *= 2 satisfied = 0 if i % 1000 == 0 and stable == 64: print("Stable proportion: ", float(satisfied) / 1000) if float(satisfied) / 1000 > 0.94: stable *= 2 satisfied = 0 if i % 1000 == 0 and stable == 128: print("Stable proportion: ", float(satisfied) / 1000) if float(satisfied) / 1000 > 0.9: stable *= 2 satisfied = 0 if i % 1000 == 0 and stable == 256: print("Stable proportion: ", float(satisfied) / 1000) if float(satisfied) / 1000 > 0.85: stable *= 2 satisfied = 0 if i % 1000 == 0 and stable == 512: print("Stable proportion: ", float(satisfied) / 1000) if float(satisfied) / 1000 > 0.8: stable *= 2 satisfied = 0 if i % 1000 == 0 and stable == 1024: print("Stable proportion: ", float(satisfied) / 1000) if float(satisfied) / 1000 > 0.5: stable *= 2 satisfied = 0 self.model.save(filepath=self.model_path)
def learn(self, itr_time=5, batch_size=128, goal=2048, dynamic_batch=False): from .expectimax import board_to_move self.tch_search_fun = board_to_move if dynamic_batch: batch_size = 8 max_score = 0 stable = 8 satisfied = 0 stat = {2048: 0, 1024: 0, 512: 0, 256: 0, 128: 0, 64: 0, 32: 0, 16: 0} # train over and over again cnt = 0 cnt1 = 0 for i in range(itr_time): X_train = [] y_train = [] loss = acc = 0 game = Game(4, goal) while not game.end: # print(game.board.shape, '\n') # print(np.expand_dims(game.board, axis=0).shape) oht = self.one_hot(game.board) direction = self.model.predict(oht[np.newaxis, :, :, :]) good = self.tch_search_fun(game.board) X_train.append(oht[:, :, :]) yi = [0.0, 0.0, 0.0, 0.0] yi[good] = 1.0 y_train.append(yi) # y_train.append(yi) cnt += 1 cnt1 += 1 if cnt == batch_size: # print(set_of_lengths(X_train)) loss, acc = self.model.train_on_batch( np.array(X_train), np.array(y_train)) if cnt1 % 200 == 0: print("Loss\tAcc") print(loss, acc) X_train = [] y_train = [] cnt = 0 if cnt1 % 1000 == 0: print("Agent: ", direction) print("Good: ", yi) cnt1 = 0 game.move(direction.argmax()) if (i % 20 == 0): print("Training Number: ", i) print("Score: ", game.score) print("Stable: ", stable) # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) # print(sess) if i % 500 == 0: self.model.save(filepath=self.model_path) # increase batch size if dynamic_batch and game.score > max_score: max_score = game.score batch_size = max_score / 4 print("Higher score occurred, increase batch_size to", batch_size) print("Current Max Score is", max_score) for s in [2048, 1024, 512, 256, 128, 64, 32, 16]: if game.score >= s: stat[s] += 1 if game.score >= stable * 2: satisfied += 1 # check if go to next stage if i % 1000 == 0: print("stat: ", stat) print("stable", stable) if float(satisfied) / 1000 > 0.96: stable *= 2 satisfied = 0 for s in [2048, 1024, 512, 256, 128, 64, 32, 16]: stat[s] = 0 self.model.save(filepath=self.model_path)
display1 = Display() display2 = IPythonDisplay() model = keras.models.load_model('model.h5') image = [] label = [] for i in range(0, 10): game = Game(4, score_to_win=2048, random=False) agent = ExpectiMaxAgent(game, display=display1) while game.end == False: direction = agent.step() image.append(game.board) label.append(direction) game.move(direction) display1.display(game) #运行10次游戏并记录棋盘和方向 x_train = np.array(image) y_train = np.array(label) x_train = np.log2(x_train + 1) x_train = np.trunc(x_train) x_train = keras.utils.to_categorical(x_train, 12) print(x_train.shape) y_train = keras.utils.to_categorical(y_train, NUM_CLASSES) model.train_on_batch(x_train, y_train)
# print (game.board) # print ("direction: ", direction) if game.board.max() < 256: for i in range(4): for j in range(4): #f.write(game.board[i,j]) print(game.board[i, j], file=f1) print(direction, file=f1) elif game.board.max() < 512: for i in range(4): for j in range(4): #f.write(game.board[i,j]) print(game.board[i, j], file=f2) print(direction, file=f2) else: for i in range(4): for j in range(4): #f.write(game.board[i,j]) print(game.board[i, j], file=f3) print(direction, file=f3) #f.write(direction) game.move(direction) #f.write('\n')
display2 = Display() stop_number = 2048 size = int(np.log2(stop_number)) +1 #跑到stop number时所需的one-hot编码位数 for i in range(0,500): #跑500次棋盘,跑到stop_number停止 game = Game(4, score_to_win=2048, random=False) agent = ExpectiMaxAgent(game, display=display1) #使用强Agent while game.end==False: a=np.array(game.board) direction=agent.step() image.append(game.board) label.append(direction) game.move(direction) if np.amax(a)==stop_number: break display1.display(game) image=np.array(image) #将得到的数据和标签转换为numpy数组 label=np.array(label) #划分训练集和测试集 x_train, x_test, y_train, y_test = train_test_split(image, label, test_size = 0.1, random_state= 30) size = int(np.log2(stop_number)) +1 #跑到stop number时所需的one-hot编码位数 input_shape = (4, 4, size)
super().__init__(game, display) self.testgame = Game(4, random=False) self.testgame.enable_rewrite_board = True def step(self): piece = [ map_table[k] for k in self.game.board.astype(int).flatten().tolist() ] x0 = np.array([grid_one(np.array(piece).reshape(4, 4))]) preds = list(model.predict(x0)) direction = np.argmax(preds[0]) return direction steps = 0 scores = [] time_start = time.time() for i in range(ntest): game = Game(4, random=False) agent = MyAgent(game, display=None) while not game.end: game.move(agent.step()) steps += 1 scores.append(game.score) time_end = time.time() print("steps", steps) print('totally cost', time_end - time_start) print("\n", scores) print("Average scores: @%d times" % ntest, sum(scores) / len(scores))
inputboard[0, p, q, 0] = 1 else: inputboard[0, p, q, int(np.log2(num))] = 1 if maxNum <= 256: boards_256.append(inputboard[0]) directions_256.append(rightDirection) myDirection = model_256.predict(inputboard).tolist()[0] elif maxNum == 512: boards_512.append(inputboard[0]) directions_512.append(rightDirection) myDirection = model_512.predict(inputboard).tolist()[0] elif maxNum == 1024: boards_1024.append(inputboard[0]) directions_1024.append(rightDirection) myDirection = model_1024.predict(inputboard).tolist()[0] game.move(myDirection.index(max(myDirection))) print('len(boards_256) = ', len(boards_256)) # print ('len(boards_512) = ', lesourcn(boards_512)) print('len(boards_512) = ', len(boards_512)) print('len(boards_1024) = ', len(boards_1024)) if len(boards_256) >= 200000: # convert to numpy array boards_256 = np.array(boards_256) directions_256 = np.array(directions_256) # convert to one-hot encoding directions_256 = keras.utils.to_categorical(directions_256, num_classes=NUM_CLASSES) # train print("training on model_256") model_256.fit(boards_256, directions_256,
if np.sum(game.board) > 384: break a = np.array(game.board) a = np.log2(a + 1) a = np.trunc(a) a = keras.utils.to_categorical(a, board_class) a = a.reshape(1, 4, 4, board_class) prediction = model.predict(a, batch_size=128) b = prediction[0] b = b.tolist() direction2 = b.index(max(b)) direction1 = agent1.step() boards.append(game.board) directions.append(direction1) game.move(direction2) display1.display(game) if np.amax(game.board) == 1024: count += 1 if count > 98: break else: boards = np.array(boards) directions = np.array(directions) x_train, x_test, y_train, y_test = train_test_split(boards, directions, test_size=0.01, random_state=30) x_train = np.log2(x_train + 1)
game = Game(4, score_to_win=2048, random=False) agent_exp = ExpectiMaxAgent(game) agent = MyAgent(game) while (game.score <= 1024) and (not game.end): A = game.board A[A == 0] = 1 A = np.log2(A) A = np.int32(A) A = A.reshape(16) dir = agent.step() # you can change the condition to get different data if game.score >= 512: dir_exp = agent_exp.step() results.append(A) direction.append(dir_exp) game.move(dir) if 0 == i % 100: # save the result every 100 games results = np.array(results) direction = np.array(direction) final_results = np.c_[results, direction] final_results = pd.DataFrame(final_results) final_results.to_csv("data/data_online_1024.csv", index=False, header=False, mode='a+') results = [] direction = [] i += 1
from game2048.agents import MyAgent from game2048.displays import Display import csv import os game_size = 4 score_to_win = 2048 iter_num = 3000 game = Game(game_size, score_to_win) board = game.board agenta = ExpectiMaxAgent(game, Display()) agentb = MyAgent(game, Display()) directiona = agenta.step() directionb = agentb.step() board = game.move(directionb) i = 0 dic = {} idx = 0 # save file filename = '/home/olivia/PycharmProjects/2048/game2048/data/traindata10.csv' if os.path.exists(filename): start = True else: start = False os.mknod(filename) with open(filename, "a") as csvfile: writer = csv.writer(csvfile)
model = RCNN_model() model.load_weights("checkpoints/checkpoint.hdf5") def reshape_board(board): res = np.zeros((4, 4), dtype=float) for i in range(4): for j in range(4): k = int(board[i, j]) if k != 0: res[i, j] = np.log2(k) / 11 res1 = res.T return np.hstack((res, res1)) game3 = Game(score_to_win=2048, random=False) display3 = Display() while game3.end == 0: display3.display(game3) # agent1 = ExpectiMaxAgent(game3) board = np.array([reshape_board(game3.board)]) prediction = model.predict(board) step = np.argmax(prediction, axis=1) # step = agent1.step() game3.move(step)