def problem6(): """ Repeat the previous question, but using the GA (as described earlier in this homework) on the cart-pole domain. Report the same quantities and how the policy was parameterized. """ #TODO print("Problem 6") # Environment Params m = 4 numActions = 2 # Policy Search Params numTrials = 50 numGenerations = 20 populationSize = 20 numEpisodes = 10 numElite = 10 numTruncate = 5 alpha = 0.1 k = 3 policyEval = CartPoleEvaluation(k=k) initGA = GAInit(numActions * np.power(k + 1, m)) # print("Trials: %d\nIterations: %d\nEpisodes: %d\nSigma: %f" % (numTrials, numIters, numEps, sigma)) agent = GA(populationSize, policyEval, initGA, numElite=numElite, numTruncate=numTruncate, alpha=alpha, numEpisodes=numEpisodes) for trial in range(numTrials): print("Trial ", trial) for gen in range(numGenerations): print("Generation ", gen) agent.train() policyEval.endTrial() agent.reset() policyEval.plot('learningCurve_cartpole_GA_{}.png'.format(trial), "Learning Curve - Cartpole with GA Agent")
def problem3(): """ Repeat the previous question, but using the GA (as described earlier in this assignment) on the More-Watery 687-Gridworld domain. Report the same quantities. """ #TODO print("Problem 3") # Environment Params num_states = 25 num_actions = 4 # Policy Search Params numTrials = 50 numGenerations = 100 populationSize = 30 numEpisodes = 20 numElite = 20 numTruncate = 5 alpha = 1.25 policyEval = GridworldEvaluation() initGA = GAInit(num_states * num_actions) # print("Trials: %d\nIterations: %d\nEpisodes: %d\nSigma: %f" % (numTrials, numIters, numEps, sigma)) agent = GA(populationSize, policyEval, initGA, numElite=numElite, numTruncate=numTruncate, alpha=alpha, numEpisodes=numEpisodes) for trial in range(numTrials): print("Trial ", trial) for gen in range(numGenerations): print("Generation ", gen) agent.train() policyEval.endTrial() agent.reset() policyEval.plot('learningCurve_gridworld_GA_{}.png'.format(trial), "Learning Curve - Gridworld with GA Agent")
def problem6(): """ Repeat the previous question, but using the GA (as described earlier in this homework) on the cart-pole domain. Report the same quantities and how the policy was parameterized. """ print("ga-cartpole-softmax_theta_phi") # fourier_param = 2 def initPopFn(pop_size): theta_arr = np.zeros((pop_size, 2 * fourier_param**4)) return theta_arr state = np.array([0, 0, 0, 0]) env = Cartpole() env.nextState(state, 0) fourier_param = 4 # theta = np.zeros(2*fourier_param**4) # sigma = 1 popSize = 10 numElite = 3 numEpisodes = 5 evaluate = EvaluateCartpole() # epsilon = 0.005 ga = GA(popSize, evaluate, initPopFn, numElite, numEpisodes) # numTrials = 50 numTrials = 10 numIterations = 100 # numIterations = 250 # numIterations = 20 total_episodes = numIterations * numEpisodes * popSize # 20*50*10 results = np.zeros((numTrials, total_episodes)) for trial in range(numTrials): ga.reset() for i in range(numIterations): #DEBUG if (i % 5 == 0): print("cart ga: ", "trial: ", trial, "/", numTrials, " iteration: ", i, "/", numIterations) ga.train() batch_start = (i * numEpisodes) * popSize batch_end = ((i + 1) * numEpisodes) * popSize results[trial, batch_start:batch_end] = np.array(evaluate.batchReturn) average_results = np.average(np.array(results), axis=0) std_results = np.std(np.array(results), axis=0) maximumEpisodes = average_results.shape[0] max_avg = np.max(average_results) plt.errorbar(np.array([i for i in range(maximumEpisodes)]), average_results, std_results, marker='.', ecolor='aqua') plt.grid(True) plt.axhline(max_avg) plt.text(0, max_avg, "max: " + str(round(max_avg, 2)), fontsize=15, backgroundcolor='w') plt_name = "ga_cartpole" now = datetime.now() param_string = "_numTrials_"+str(numTrials)+"_numIter_" \ + str(numIterations) + "_popSize_" +str(popSize) dt_string = now.strftime("_t_%H_%M") plt_name += param_string plt_name += dt_string print("plt_name=", plt_name) plt_path = "images/" + plt_name + ".png" # plot_min = -100 # plot_max = 10 # plt.ylim(average_results.min(), average_results.max()) # plt.ylim(plot_min, plot_max) plt.savefig(plt_path, dpi=200) plt.show() np.save("data/" + "results_" + plt_name, results) np.save("data/" + "average_results_" + plt_name, average_results) np.save("data/" + "std_results_" + plt_name, std_results) #TODO pass
def problem3(): """ Repeat the previous question, but using the GA (as described earlier in this assignment) on the More-Watery 687-Gridworld domain. Report the same quantities. """ def initPopFn(pop_size): # theta_arr = np.zeros((pop_size,100)) theta_zeros = np.zeros(100) theta_arr = np.random.multivariate_normal(theta_zeros, np.identity( len(theta_zeros)), size=pop_size) # print(theta_arr.shape) # return child return theta_arr print("ga-gridworld-tabular_softmax") popSize = 10 evaluate = Evaluate() numElite = 4 numEpisodes = 10 # numEpisodes = 50 ga = GA(popSize, evaluate, initPopFn, numElite, numEpisodes) # numTrials = 50 numTrials = 20 # numIterations = 100 numIterations = 25 total_episodes = numIterations * numEpisodes * popSize # 20*50*10 # total_episodes = numIterations*num_episodes # 100*50 results = np.zeros((numTrials, total_episodes)) # results = [] # iter_results = [] for trial in range(numTrials): ga.reset() for i in range(numIterations): #DEBUG if (i % 5 == 0): print("ga: ", "trial: ", trial, "/", numTrials, " iteration: ", i, "/", numIterations) ga.train() batch_start = (i * numEpisodes) * popSize batch_end = ((i + 1) * numEpisodes) * popSize results[trial,batch_start:batch_end] =\ np.array(evaluate.batchReturn) # np.evaluate.batchReturn average_results = np.mean(np.array(results), axis=0) std_results = np.std(np.array(results), axis=0) maximumEpisodes = average_results.shape[0] max_avg = np.max(average_results) plt.errorbar(np.array(range(maximumEpisodes)), average_results, std_results, marker='.', ecolor='aqua') plt.grid(True) plt.axhline(max_avg) plt.text(0, max_avg, "max: " + str(round(max_avg, 2)), fontsize=15, backgroundcolor='w') plt_name = "ga_gridworld" now = datetime.now() param_string = "_numTrials_" + str(numTrials) + "_numIter_" + str( numIterations) dt_string = now.strftime("_%H_%M") plt_name += param_string plt_name += dt_string print("plt_name=", plt_name) plt_path = "images/" + plt_name + ".png" # plot_min = -100 # plot_max = 10 # plt.ylim(average_results.min(), average_results.max()) # plt.ylim(plot_min, plot_max) plt.savefig(plt_path, dpi=200) plt.show() np.save("data/" + "results_" + plt_name, results) np.save("data/" + "average_results_" + plt_name, average_results) np.save("data/" + "std_results_" + plt_name, std_results)