def plot_rewards(self, line_informations, avg_window_size=None): """ :param line_informations: a list of 3 tuples (label, rewards, softmax_values) :param avg_window_size: :return: """ nb_row = len(line_informations) + 1 ax1 = plt.subplot(nb_row, 1, nb_row) ax1.set_title("reward in fct. of iteration") for i, (label, rewards, softmax_values) in enumerate(line_informations): ax2 = plt.subplot(nb_row, 1, i + 1) ax2.set_title("%s - policy in fct. of iteration" % label) if rewards.shape[0] == 1: rewards = rewards.flatten() xpoints = np.array(range(rewards.size)) for j, s in enumerate(softmax_values.T): ax2.plot(xpoints, s.flatten(), label=self.option_label[j]) if i == 0: legend = ax2.legend(loc='upper right', shadow=True) if not (avg_window_size is None): rewards = movingaverage(rewards, avg_window_size) xpoints = xpoints[avg_window_size - 1:] ax1.plot(xpoints, rewards, label=label) legend = ax1.legend(loc='lower right', shadow=True) else: raise NotImplementedError() plt.show()
def plot_comparative(results, labels, moving_avg_window_size=None, plot_iter_average=False, all_lines=False): if plot_iter_average: ax1 = plt.subplot(2, 1, 1) else: ax1 = plt.subplot(1, 1, 1) ax1.set_ylabel("Avg. Reward") ax1.set_xlabel("# of episodes completed") ax1.set_title("Average reward in fct. of # of episodes completed ") if plot_iter_average: ax2 = plt.subplot(2, 1, 2) ax2.set_title("Average reward in fct of # iteration completed") ax2.set_ylabel("Reward") ax2.set_xlabel("# of iterations completed") if len(results[0]) == 3: colors = ['b-', 'r-', 'g-'] else: colors = ['b:', 'r:', 'g:', 'b-', 'r-', 'g-'] for i, lbl in enumerate(labels): c = colors[i] num_episodes = len(results[0][i]) avg_r_episode = np.zeros((num_episodes, )) iter_reward_list = [] for res in map(itemgetter(i), results): l, r = zip(*list(map(lambda x: (len(x[2]), np.sum(x[2])), res))) assert len(l) == num_episodes avg_r_episode += r xpoints = np.array(range(num_episodes)) if not (moving_avg_window_size is None): r = movingaverage(r, moving_avg_window_size) xpoints = xpoints[moving_avg_window_size - 1:] # xpoints2 = xpoints2[moving_avg_window_size - 1:] if all_lines: ax1.plot(xpoints, r, c, alpha=0.12) if plot_iter_average: xpoints2 = reduce(lambda x, y: (x[0] + [x[1] + y], x[1] + y), l, ([], 0))[0] step_size = float(xpoints2[-1] - xpoints2[0]) / num_episodes width = 50 xpoints2, avg_r_iter_taken = weighted_moving_average( xpoints2, r, step_size, width=width) if all_lines: ax2.plot(xpoints2, avg_r_iter_taken, c, alpha=0.12) iter_reward_list += zip(xpoints2, avg_r_iter_taken) #ax2.plot(xpoints2, r, c,alpha=0.1) #iter_reward_list += zip(xpoints2,r) avg_r_episode = 1.0 / len(results) * avg_r_episode xpoints = np.array(range(num_episodes)) if not (moving_avg_window_size is None): avg_r_episode = movingaverage(avg_r_episode, moving_avg_window_size) xpoints = xpoints[moving_avg_window_size - 1:] ax1.plot(xpoints, avg_r_episode, c, label=lbl) if plot_iter_average: iter_reward_list = sorted(iter_reward_list, key=itemgetter(0)) step_size = float(iter_reward_list[-1][0] - iter_reward_list[0][0]) / num_episodes x, y = zip(*iter_reward_list) width = 205 xpoints2, avg_r_iter_taken = weighted_moving_average(x, y, step_size, width=width) ax2.plot(xpoints2, avg_r_iter_taken, c, label=lbl) legend = ax1.legend(loc='lower right', shadow=True) if plot_iter_average: legend = ax2.legend(loc='lower right', shadow=True) plt.show()
def test_MA_edge(self): result = utils.movingaverage([1, 1, 1, 1, 1], 3, edgeCorrection=True) correct_result = np.array([1., 1., 1., 1., 1.]) np.testing.assert_array_equal(result, correct_result)
def test_MA_2(self): result = utils.movingaverage([1, 1, 1, 1, 1, 1, 1, 1], 4) correct_result = np.array([0.5, 0.75, 1., 1., 1., 1., 1., 0.75]) np.testing.assert_array_equal(result, correct_result)
def test_MA_1(self): result = utils.movingaverage([1, 1, 1, 1, 1], 3) correct_result = np.array([0.66666667, 1, 1, 1, 0.66666667]) np.testing.assert_array_almost_equal(result, correct_result)
def test_MA_edge2(self): result = utils.movingaverage([1, 2, 3, 4, 5], 3, edgeCorrection=True) correct_result = np.array([1.5, 2., 3., 4., 4.5]) np.testing.assert_array_almost_equal(result, correct_result)