def run_sarsa(start, goal, Xrange, Vrange, plot_data_pid): sarsa_plot_data = list() sarsa_plot_data.append(plot_data_pid) for i in range(1, 9): sarsa = Sarsa(start, goal, Xrange, Vrange, n=i) sarsa.train(epoch=EPOCH, max_episode_length=MAX_EPISODE_LENGTH) sarsa_plot_data.append(sarsa.episodes) plot_with_n(sarsa_plot_data)
# avg = np.average(np.array(rewards), axis=0) # std = np.std(np.array(rewards), axis=0) # maximumEpisodes = avg.shape[0] # plt.errorbar(np.array([i for i in range(maximumEpisodes)]), avg, std, marker='^', ecolor='g') # plt.show() type = "linear" # best parameter, order 3, e 0.2, alpha 0.5 # best parameter, order 5, e 0.2, alpha 0.5 for e in [0.3]:#, 0.1, 0.01, 0.3, 0.4]: for order in [3]: #, 5]: for alpha in [0.01]:#, 0.0001, 0.0005, 0.0009, 0.001, 0.005, 0.009, 0.01, 0.05, 0.09, 0.1, 0.5, 0.9]: rewards = [] print("Alpha: ", alpha) for t in tqdm(range(trails)): # print("Alpha: %s, Trail: %s" %(alpha, t)) td = Sarsa(gamma, alpha, env, state_space, steps, e, plot=plot, order=order, discount=discount) td.train(episodes) rewards.append(td.reward) avg = np.average(np.array(rewards), axis=0) std = np.std(np.array(rewards), axis=0) maximumEpisodes = avg.shape[0] plt.errorbar(np.array([i for i in range(maximumEpisodes)]), avg, std, marker='^', ecolor='g') #name = "Sarsa/figures/%s/cartPole_type_%s_order%s_alpha%s_e%s.jpg" %(type, type, order, alpha, e) name = "Grid_alpha%s_e%s.jpg" % (alpha, e) pickle.dump(avg, open(name, "wb")) plt.xlabel("Number of episodes") plt.ylabel("Total Reward") # plt.savefig(name) # plt.close()