Пример #1
0
def main():
    global root
    parser = argparse.ArgumentParser()
    parser.add_argument('--epsilon', type=float, default=0.9, help='the probability to choose from memories')
    parser.add_argument('--memory_capacity', type=int, default=50000, help='the capacity of memories')
    parser.add_argument('--target_replace_iter', type=int, default=100, help='the iter to update the target net')
    parser.add_argument('--batch_size', type=int, default=16, help='sample amount')
    parser.add_argument('--lr', type=float, default=0.001, help='learning rate')
    parser.add_argument('--n_epochs', type=int, default=20000, help='training epoch number')
    parser.add_argument('--n_critic', type=int, default=100, help='evaluation point')
    parser.add_argument('--test', type=int, default=0, help='whether execute test')
    parser.add_argument('--conv', type=int, default=0, help = 'choose between linear and convolution')
    opt = parser.parse_args()
    print(opt)

    miner = Miner(opt.epsilon, opt.memory_capacity, opt.target_replace_iter, opt.batch_size, opt.lr, opt.conv)
    miner.load_params('eval.pth')
    global root
    # create Tk widget
    root = Tk()
    # set program title
    root.title("Minesweeper")
    # create game instance
    game = GUI(root)
    def sub_func():
        print('pray tell')
        s = game.get_state()
        a = miner.choose_action(s)
        game.lclicked(a)
        print(a)
        root.after(1000, sub_func)
    # run event loop
    root.after(1000, sub_func)
    root.mainloop()
Пример #2
0
    axes = plt.gca()
    # axes.set_ylim([0, 500])
    plt.clf()
    plt.xlabel('Test Number')
    plt.ylabel(ylabel)
    plt.plot(success)
    print(success)
    z = movingaverage(success,10)
    #chop off the remaining 10
    z = z[:-10]
    z = numpy.concatenate((numpy.zeros(10), z))
    plt.plot(z)
    plt.savefig(name)

if opt.test:
    miner.load_params('eval.pth')
    game = Minesweeper()
    #to be changed upon GUI
    game.action(0)
    s = game.get_state()
    game.show()
    while game.get_status() == 0:
        a = miner.choose_action(s)
        game.action(a)
        game.show()
else:
    win_num = 0
    fail_num = 0
    avg_rewards = []
    success = []
    for epoch in range(opt.n_epochs):