def main(): quit = -1 while quit < 0: print "-" * 30 + " Project 1 " + "-" * 30 # print "Choose:" print "\n\n For a: \n 1: Graph Figure 7.16 in [GT] page 364 \n Default values (from = 'BWI', to = ['SFO','LAX']) \n\n For b: \n 2: MC with (3,2) \n 3: MC with (4,3) \n\n To Quit: \n 4: quit\n\n" choice = input('Enter your choise: ') if choice == 4: quit = 1 elif choice == 3: mc.main(4, 3) elif choice == 2: mc.main(3, 2) elif choice == 1: gr.main()
plt.ylabel('Win Rate') plt.title('Summary of ' + a1Name + ' and ' + a2Name + ' over episodes') plt.legend(loc="upper left") plt.show() if __name__ == '__main__': mcAverageMoves = [] qlAverageMoves = [] rAverageMoves = [] for i in range(50, 1001, 50): aveMC = [] aveQL = [] aveR = [] for j in range(20): b1 = MC.main(i) mcMoves = totalMoves(b1) aveMC.append(mcMoves) b2 = QL.main(i) qlMoves = totalMoves(b2) aveQL.append(qlMoves) rAgent = Agent(h, w) randomAgent(rAgent) rMoves = totalMoves(rAgent.enemyBoard) aveR.append(rMoves) #arrays of shape (20,20). 20 episodes and 20 games per episode mcAverageMoves.append(aveMC) qlAverageMoves.append(aveQL)
# Run this file to see that our code works import QL import MC print("Running Monte-Carlo learning algorithm for 1000 episodes...") print("Please exit Monte-Carlo graph to continue") MC.main(1000) print("Running Q-learning learning algorithm for 1000 episodes...") QL.main(1000)