def main(): global root parser = argparse.ArgumentParser() parser.add_argument('--epsilon', type=float, default=0.9, help='the probability to choose from memories') parser.add_argument('--memory_capacity', type=int, default=50000, help='the capacity of memories') parser.add_argument('--target_replace_iter', type=int, default=100, help='the iter to update the target net') parser.add_argument('--batch_size', type=int, default=16, help='sample amount') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--n_epochs', type=int, default=20000, help='training epoch number') parser.add_argument('--n_critic', type=int, default=100, help='evaluation point') parser.add_argument('--test', type=int, default=0, help='whether execute test') parser.add_argument('--conv', type=int, default=0, help = 'choose between linear and convolution') opt = parser.parse_args() print(opt) miner = Miner(opt.epsilon, opt.memory_capacity, opt.target_replace_iter, opt.batch_size, opt.lr, opt.conv) miner.load_params('eval.pth') global root # create Tk widget root = Tk() # set program title root.title("Minesweeper") # create game instance game = GUI(root) def sub_func(): print('pray tell') s = game.get_state() a = miner.choose_action(s) game.lclicked(a) print(a) root.after(1000, sub_func) # run event loop root.after(1000, sub_func) root.mainloop()
axes = plt.gca() # axes.set_ylim([0, 500]) plt.clf() plt.xlabel('Test Number') plt.ylabel(ylabel) plt.plot(success) print(success) z = movingaverage(success,10) #chop off the remaining 10 z = z[:-10] z = numpy.concatenate((numpy.zeros(10), z)) plt.plot(z) plt.savefig(name) if opt.test: miner.load_params('eval.pth') game = Minesweeper() #to be changed upon GUI game.action(0) s = game.get_state() game.show() while game.get_status() == 0: a = miner.choose_action(s) game.action(a) game.show() else: win_num = 0 fail_num = 0 avg_rewards = [] success = [] for epoch in range(opt.n_epochs):