def boxplot_multiple_configurations_rewards_timesteps_last_episodes(
        algor, param, values_of_param, last_20_rewards, last_20_timesteps):
    """
    Generate boxplots for the value of reward over the last 20 episodes
    """

    # non sto più facendo una media, sto mettendo tutti i punti del reward medio
    # last 20 episodes rewards of 5 run -> 100 punti per box
    # [    run 1     run 2   run 3   run 4    run 5
    #     [ep 90]    ...     ...
    #     [ep 91]
    #     ...
    #     [ep 100]
    # ]
    fig, ax = plt.subplots()
    col = ax.boxplot(last_20_rewards)
    ax.set_xticklabels(values_of_param)
    ax.set_ylabel('Avg reward')
    ax.set_title('Avg reward in 5 runs of last 20 episodes per config of ' +
                 param + ' for algo ' + algor)
    fig.tight_layout()
    plt.savefig('boxplot_param_reward_last_20' + get_extension())

    fig, ax = plt.subplots()
    col = ax.boxplot(
        last_20_timesteps)  # , ["SARSA", "SARSA(λ)", "Q-learning", "Q(λ)"])
    ax.set_xticklabels(values_of_param)
    ax.set_ylabel('Avg time steps')
    ax.set_title(
        'Avg time steps in 5 runs of last 20 episodes per config of ' + param +
        ' for algo ' + algor)
    fig.tight_layout()
    plt.savefig('boxplot_param_timestep_last_20' + get_extension())
Пример #2
0
def plot_multiple_algos_avg_rewards_timesteps_bars(algos, avg_rew, avg_steps,
                                                   path):
    """
    Plot averaged bar graphs for 1 single path
    """
    target_output_dir = build_output_dir_from_path(output_dir, path)

    fig, ax = plt.subplots()
    cols_labels = []
    for al in algos:
        cols_labels.append(print_cute_algo_name(al))
    col = ax.bar(cols_labels, avg_rew, align='center')

    ax.set_ylabel('Avg reward for episode')
    # ax.set_title('Avg reward for algos')
    plt.axhline(0, color='black', lw=.3)
    fig.tight_layout()
    plt.savefig(target_output_dir + 'avg_rewards_for_algos' + get_extension())
    plt.show()

    fig, ax = plt.subplots()
    col = ax.bar(
        cols_labels,
        avg_steps,
        align='center',
    )
    ax.set_ylabel('Avg time steps for episode')
    fig.tight_layout()
    plt.savefig(target_output_dir + 'avg_steps_for_algos' + get_extension())
    plt.show()
Пример #3
0
def plot_training_time_traffic(path=None):
    """
    Generate boxplots for training time and traffic values saved into csv files
    """
    times = [[], [], [], []]
    traffic = [[], [], [], []]

    starter = "0"
    if path is not None:
        starter = "path" + str(path)

    # target_output_dir is current directory if path is not defined (I used it for data before tuning)
    target_output_dir = build_output_dir_from_path(output_dir, path)

    algos = ["sarsa", "sarsa_lambda", "qlearning", "qlearning_lambda"]

    # Check input_dir exists
    if pathlib.Path(input_dir).exists():
        pass
    else:
        print("Input directory does not exist.")
        exit(1)

    # read csv files generated by get_training_time_traffic.py
    for index, fa in enumerate(algos):
        with open(input_dir + starter + "_" + fa + ".csv", 'r') as csv_file:
            reader = csv.reader(csv_file, delimiter=',')
            next(reader, None)
            for row in reader:
                times[index].append(float(row[1]))
                traffic[index].append(int(row[2]))

    fig, ax = matplotlib.pyplot.subplots()

    col = ax.boxplot(times)
    ax.set_xticklabels([print_cute_algo_name(algos[0]), print_cute_algo_name(algos[1]), print_cute_algo_name(algos[2]), print_cute_algo_name(algos[3])])
    ax.set_ylabel('Time (s)')
    matplotlib.pyplot.xticks(rotation=45)
    # ax.set_title('Training time per algorithm')
    matplotlib.pyplot.grid(True, color='gray', linestyle='dashed')
    fig.tight_layout()
    matplotlib.pyplot.savefig(target_output_dir + 'training_times' + get_extension())
    matplotlib.pyplot.show()

    fig, ax = matplotlib.pyplot.subplots()
    col = ax.boxplot(traffic)  # , ["SARSA", "SARSA(λ)", "Q-learning", "Q(λ)"])
    ax.set_xticklabels(["SARSA", "SARSA(λ)", "Q-learning", "Q(λ)"])
    matplotlib.pyplot.xticks(rotation=45)
    ax.set_ylabel('Number of commands sent')
    # ax.set_title('Traffic generated per algorithm')
    matplotlib.pyplot.grid(True, color='gray', linestyle='dashed')
    fig.tight_layout()
    matplotlib.pyplot.savefig(target_output_dir + 'training_traffic' + get_extension())
    matplotlib.pyplot.show()
def plot_multiple_configuration_avg_rewards_timesteps_bars(
        algo, param, param_values, avg_rew, avg_steps):
    """
    Generate bar plots with the global average reward and timesteps values
    for multiple executions and configurations of a single parameter
    """

    complete_target_dir = build_output_dir_for_params(target_dir, param, algo)

    fig, ax = plt.subplots()
    param_labels = []
    cnt = 0
    for i in param_values:
        if cnt == 0 and param == "lambda":
            param_labels.append(return_greek_letter(param) + "=" + i)
            cnt += 1
        else:
            param_labels.append(
                return_greek_letter(param) + "=" + i.lstrip('0'))
    col = ax.bar(param_labels, avg_rew, align='center')

    ax.set_ylabel('Avg reward for episode')
    # ax.set_title('Avg reward for different configurations of ' + param)
    plt.axhline(0, color='black', lw=.3)
    fig.tight_layout()
    plt.savefig(complete_target_dir + 'avg_rewards_for_' + param +
                get_extension())
    plt.show()

    fig, ax = plt.subplots()
    param_labels = []
    cnt = 0
    for i in param_values:
        if cnt == 0 and param == "lambda":
            param_labels.append(return_greek_letter(param) + "=" + i)
            cnt += 1
        else:
            param_labels.append(
                return_greek_letter(param) + "=" + i.lstrip('0'))
    col = ax.bar(
        param_labels,
        avg_steps,
        align='center',
    )

    ax.set_ylabel('Avg time steps for episode')
    # ax.set_title('Avg steps for different configurations of ' + param)
    fig.tight_layout()
    plt.savefig(complete_target_dir + 'avg_steps_for_' + param +
                get_extension())
    plt.show()
def plot_cdf_reward_multiple_algo(algorithms_target, episodes_target, avg_rew,
                                  path):
    """
    Generate plot of the CDF of the average reward for episodes
    """
    target_output_dir = build_output_dir_from_path(output_dir, path)

    fig, ax = plt.subplots()

    for i in range(0, len(algorithms_target)):
        # plt.plot(episodes_target[i], avg_rew[i], label=algorithms_target[i],)
        # First sorting the array
        plt.hist(np.sort(avg_rew[i]),
                 density=True,
                 cumulative=True,
                 label=print_cute_algo_name(algorithms_target[i]),
                 bins=2000,
                 histtype='step',
                 alpha=0.8)
        fix_hist_step_vertical_line_at_end(ax)

    plt.xlabel('Reward')
    plt.ylabel('CDF (Episode)')
    plt.legend(loc='lower right', prop=fontP, ncol=n_cols)
    # plt.title('CDF of avg reward per sent command')
    plt.grid(True, color='gray', linestyle='dashed')
    plt.tight_layout()
    plt.ylim(0, 1.0)
    plt.savefig(target_output_dir + 'cdf_rewards_multiple_algo' +
                get_extension())
    plt.show()
Пример #6
0
def retrieve_reward_per_request_single_run(date_to_retrieve,
                                           show_intermediate_graphs=False,
                                           color_index=0,
                                           algorithm="sarsa"):
    """
    Retrieve the reward per each time step (or command sent by the RL algorithm) from log file
    for 1 single execution
    """

    episodes, commands, reward, cum_rewards = read_all_info_from_log(
        date_to_retrieve)

    if show_intermediate_graphs:
        colors = ["#EB1E35", "#E37600", "#054AA6", "#038C02"]

        pl.plot(commands,
                cum_rewards,
                label=algorithm,
                color=colors[color_index])

        pl.xlabel("Number of sent commands $\mathregular{n_a}$")
        pl.ylabel("Cumulative reward $\mathregular{C(n_a)}$")
        pl.legend(loc='upper right')
        # pl.title('Cumulative reward over commands for ' + algorithm)
        pl.grid(True)
        plt.savefig('commands_plot_' + algorithm + get_extension())
        pl.tight_layout()
        plt.show()

    return commands, cum_rewards, len(commands)
Пример #7
0
def plot_multiple_algo_moving_avg(algorithms_target,
                                  episodes_target,
                                  moving_average_rewards_target,
                                  moving_average_timesteps_target,
                                  path=None):
    """
    Generate plots having the moving average reward and timesteps values for all RL algorithms
    """
    target_output_dir = build_output_dir_from_path(output_dir, path)

    for i in range(0, len(algorithms_target)):
        pl.plot(episodes_target[i]
                [np.array(episodes_target[i]).shape[0] -
                 np.array(moving_average_rewards_target[i]).shape[0]:],
                moving_average_rewards_target[i],
                label=print_cute_algo_name(algorithms_target[i]))

    pl.xlabel("Episode $\mathregular{E}$")
    pl.ylabel("Total reward $\mathregular{R(E)}$")
    pl.legend(loc='lower right', prop=fontP, ncol=n_cols)
    # pl.title('Moving average of final reward over episodes')
    pl.grid(True, color='gray', linestyle='dashed')
    pl.tight_layout()
    plt.savefig(target_output_dir + 'mavg_reward_plot' + get_extension())
    plt.show()

    for i in range(0, len(algorithms_target)):
        pl.plot(episodes_target[i]
                [np.array(episodes_target[i]).shape[0] -
                 np.array(moving_average_timesteps_target[i]).shape[0]:],
                moving_average_timesteps_target[i],
                label=print_cute_algo_name(algorithms_target[i]))

    pl.xlabel("Episode $\mathregular{E}$")
    pl.ylabel("Number of time steps $\mathregular{T(E)}$")
    pl.legend(loc='upper right', prop=fontP, ncol=n_cols)
    # pl.title('Moving average of number of time steps over episodes')
    pl.grid(True, color='gray', linestyle='dashed')
    pl.tight_layout()
    plt.savefig(target_output_dir + 'mavg_timesteps_plot' + get_extension())
    plt.show()
Пример #8
0
def compute_avg_reward_per_request_multiple_runs(dates,
                                                 algo,
                                                 show_intermediate_graphs=False
                                                 ):
    """
    Compute the average reward per commands over multiple executions
    Note that multiple executions of the same algorithms are likely to have different numbers of commands sent/timesteps
    This number depends on the single run
    """
    commands = []
    cum_rewards = []
    min_length = -1

    for index, dat in enumerate(dates):
        com, cr, cl = retrieve_reward_per_request_single_run(dat)
        commands.append(com)
        cum_rewards.append(cr)
        if min_length == -1:
            min_length = cl
        if cl < min_length:
            min_length = cl
        if show_intermediate_graphs:
            pl.plot(com, cr,
                    label=algo + "-run" + str(dates.index(dat)))  # single line

    # iterate over cum_rewards and min_length of commands to compute the average of cum_rewards
    avg_cum_reward = []
    avg_commands = []
    for i in range(min_length):
        total_sum = 0.0
        total_cnt = 0.0
        for index, dat in enumerate(dates):
            total_sum += cum_rewards[index][i]
            total_cnt += 1
        avg_cum_reward.append(total_sum / total_cnt)
        avg_commands.append(i)

    if show_intermediate_graphs:
        pl.xlabel("Number of sent commands $\mathregular{n_a}$")
        pl.ylabel("Cumulative reward $\mathregular{C(n_a)}$")
        pl.legend(loc='upper right')
        # pl.title('Cumulative reward over commands for ' + algo)
        pl.grid(True)
        pl.tight_layout()
        plt.savefig('all_commands_' + algo + get_extension())
        plt.show()

    return avg_cum_reward, avg_commands
Пример #9
0
def plot_cum_reward_per_command_multiple_algos_for_specified_path(
        rewards, commands, algorithms, path):
    """
    Generate plot with the cumulative reward average over multiple executions
    for all algorithms used for 1 single path
    """
    target_output_dir = build_output_dir_from_path(output_dir, path)

    for index, al in enumerate(algorithms):
        pl.plot(commands[index],
                rewards[index],
                label=print_cute_algo_name(al))  # single line

    pl.xlabel('Number of sent commands $\mathregular{n_a}$')
    pl.ylabel('Cumulative reward $\mathregular{C(n_a)}$')
    pl.legend(loc='upper left', prop=fontP, ncol=n_cols)
    # pl.title('Cumulative reward over commands for algos')
    pl.grid(True, color='gray', linestyle='dashed')
    pl.tight_layout()
    plt.savefig(target_output_dir + 'all_commands_all_algos' + get_extension())
    plt.show()
def compute_avg_reward_single_algo_multiple_runs(date_array, algorithm=None):
    """
    Compute directly from the output file the average reward per time step for each episode
    """
    x_all = []
    y_all_avg_rewards = []

    # retrieve data for all dates
    for dat in date_array:
        x, y_avg_reward_for_one_episode = read_avg_reward_from_output_file(
            algorithm, dat)

        x_all.append(x)
        y_all_avg_rewards.append(y_avg_reward_for_one_episode)

    fig, ax = plt.subplots()
    for i in range(0, len(x_all)):
        plt.hist(np.sort(y_all_avg_rewards[i]),
                 density=True,
                 cumulative=True,
                 label='CDF-run ' + str(i),
                 bins=2000,
                 histtype='step',
                 alpha=0.8)
        fix_hist_step_vertical_line_at_end(ax)

    plt.xlabel('Reward')
    plt.ylabel('CDF (Episode)')
    plt.legend(loc='lower right', prop=fontP, ncol=n_cols)
    # plt.title('CDF of avg reward per sent command ' + algorithm)
    plt.ylim(0, 1.0)
    plt.grid(True, color='gray', linestyle='dashed')
    plt.tight_layout()
    # plt.savefig('cdf_rewards_multiple_run_' + algorithm + get_extension())
    plt.show()

    # compute average over multiple runs
    y_final_avg_rewards = []

    for array_index in range(0, len(x_all[0])):
        sum_r = 0
        count = 0
        for date_index in range(0, len(date_array)):  # compute average
            sum_r += y_all_avg_rewards[date_index][array_index]
            count += 1
        y_final_avg_rewards.append(sum_r / float(count))

    df_final_avg_over_n_runs = pd.DataFrame({
        'x': x_all[0],
        'y1': y_final_avg_rewards
    })

    i = ["sarsa", "sarsa_lambda", "qlearning",
         "qlearning_lambda"].index(algorithm)

    # plot results
    pl.plot(df_final_avg_over_n_runs['x'],
            df_final_avg_over_n_runs['y1'],
            label="avg over " + str(len(date_array)) + " run")  # avg line

    pl.xlabel('Episodes')
    pl.ylabel('Avg reward obtained per episode')
    pl.legend(loc='lower right', prop=fontP, ncol=n_cols)
    # pl.title('Reward for ' + algorithm + ' algorithm over episodes')
    pl.grid(True, color='gray', linestyle='dashed')
    pl.tight_layout()
    plt.savefig('avg_reward_plot_multiple_runs' + get_extension())
    # plt.show()
    plt.close()

    return algorithm, x_all[0], y_final_avg_rewards, y_all_avg_rewards
Пример #11
0
def plot_single_algo_multiple_runs(date_array,
                                   algorithm=None,
                                   path=None,
                                   partial=None):
    """
    Generate plots with reward of a single execution over episodes, average reward and moving average
    reward computed over multiple executions of the same RL algorithm (same values for timesteps)
    """
    target_output_dir = build_output_dir_from_path(output_dir, path, partial)

    window_size = 10

    x_all = []
    y_all_reward = []
    y_all_cum_reward = []
    y_all_timesteps = []

    x = []
    y_reward = []
    y_cum_reward = []
    y_timesteps = []

    # retrieve data for all dates
    for dat in date_array:
        x, y_reward, y_cum_reward, y_timesteps = read_reward_timesteps_from_output_file(
            algorithm, dat, partial)

        x_all.append(x)
        y_all_reward.append(y_reward)
        y_all_cum_reward.append(y_cum_reward)
        y_all_timesteps.append(y_timesteps)

    # compute average over multiple runs
    y_final_reward, y_final_cum_reward, y_final_timesteps = compute_avg_over_multiple_runs(
        len(x_all[0]), len(date_array), y_all_reward, y_all_cum_reward,
        y_all_timesteps)

    df_single_run = pd.DataFrame({
        'x': x,
        'y1': y_reward,
        'y2': y_timesteps,
        'y3': y_cum_reward
    })
    df_final_avg_over_n_runs = pd.DataFrame({
        'x': x_all[0],
        'y1': y_final_reward,
        'y2': y_final_timesteps,
        'y3': y_final_cum_reward
    })

    # calculate the smoothed moving average
    weights = np.repeat(1.0, window_size) / window_size
    yMA = np.convolve(df_final_avg_over_n_runs['y1'], weights, 'valid')

    # plot results
    pl.plot(df_single_run['x'],
            df_single_run['y1'],
            ':',
            label='1 run',
            color="grey")  # single line
    pl.plot(df_final_avg_over_n_runs['x'],
            df_final_avg_over_n_runs['y1'],
            'k',
            label=str(len(date_array)) + " runs avg")  # avg line
    pl.plot(df_final_avg_over_n_runs['x']
            [np.array(df_final_avg_over_n_runs['x']).shape[0] - yMA.shape[0]:],
            yMA,
            'r',
            label=str(len(date_array)) + ' runs moving avg')  # moving avg line

    pl.xlabel("Episode $\mathregular{E}$")
    pl.ylabel("Total reward $\mathregular{R(E)}$")
    pl.legend(loc='lower right', prop=fontP, ncol=n_cols)
    # pl.title('Final reward for ' + algorithm + ' algorithm over episodes')
    pl.grid(True, color='gray', linestyle='dashed')
    pl.tight_layout()
    plt.savefig(target_output_dir + 'all_reward_plot_' + algorithm +
                get_extension())
    plt.show()

    yMA_timesteps = np.convolve(df_final_avg_over_n_runs['y2'], weights,
                                'valid')

    # plot results
    pl.plot(df_single_run['x'],
            df_single_run['y2'],
            ':',
            label='1 run',
            color="grey")  # single line
    pl.plot(df_final_avg_over_n_runs['x'],
            df_final_avg_over_n_runs['y2'],
            'k',
            label=str(len(date_array)) + " runs avg")  # avg line
    pl.plot(df_final_avg_over_n_runs['x']
            [np.array(df_final_avg_over_n_runs['x']).shape[0] -
             yMA_timesteps.shape[0]:],
            yMA_timesteps,
            'r',
            label=str(len(date_array)) + ' runs moving avg')  # moving avg line

    pl.xlabel("Episode $\mathregular{E}$")
    pl.ylabel("Number of time steps $\mathregular{T(E)}$")
    pl.legend(loc='upper right', prop=fontP, ncol=n_cols)
    # pl.title('Time steps for ' + algorithm + ' algorithm over episodes')
    pl.grid(True, color='gray', linestyle='dashed')
    pl.tight_layout()

    plt.savefig(target_output_dir + 'all_timesteps_plot_' + algorithm +
                get_extension())
    plt.show()

    return algorithm, x, yMA, yMA_timesteps
def plot_multiple_configuration_moving_avg(algorithm, param,
                                           param_values_target,
                                           episodes_target,
                                           moving_average_rewards_target,
                                           moving_average_timesteps_target):
    """
    Generate plots with the moving average of reward and timesteps
    for multiple executions and configurations of a single parameter
    """

    complete_target_dir = build_output_dir_for_params(target_dir, param,
                                                      algorithm)

    for i in range(0, len(param_values_target)):
        if i == 0 and param == "lambda":  # lambda can be 0 so I do not remove the 0 in the legend of the plot
            pl.plot(
                episodes_target[i]
                [np.array(episodes_target[i]).shape[0] -
                 np.array(moving_average_rewards_target[i]).shape[0]:],
                moving_average_rewards_target[i],
                label=return_greek_letter(param) + r'$=$' +
                param_values_target[i],
            )
        else:
            pl.plot(
                episodes_target[i]
                [np.array(episodes_target[i]).shape[0] -
                 np.array(moving_average_rewards_target[i]).shape[0]:],
                moving_average_rewards_target[i],
                label=return_greek_letter(param) + r'$=$' +
                param_values_target[i].lstrip('0'),
            )  # remove 0 from legend, keep only decimals

    pl.xlabel("Episode $\mathregular{E}$")
    pl.ylabel("Total reward $\mathregular{R(E)}$")
    pl.legend(loc='lower right', prop=fontP, ncol=n_cols)
    # pl.title('Moving average of reward over episodes for ' + algorithm)
    pl.grid(True, color='gray', linestyle='dashed')
    pl.tight_layout()
    plt.savefig(complete_target_dir + 'mavg_reward_params' + get_extension())
    plt.show()

    for i in range(0, len(param_values_target)):
        if i == 0 and param == "lambda":
            pl.plot(
                episodes_target[i]
                [np.array(episodes_target[i]).shape[0] -
                 np.array(moving_average_timesteps_target[i]).shape[0]:],
                moving_average_timesteps_target[i],
                label=return_greek_letter(param) + r'$=$' +
                param_values_target[i],
            )  # color=color[i])
        else:
            pl.plot(
                episodes_target[i]
                [np.array(episodes_target[i]).shape[0] -
                 np.array(moving_average_timesteps_target[i]).shape[0]:],
                moving_average_timesteps_target[i],
                label=return_greek_letter(param) + r'$=$' +
                param_values_target[i].lstrip('0'),
            )

    pl.xlabel("Episode $\mathregular{E}$")
    pl.ylabel("Number of time steps $\mathregular{T(E)}$")
    pl.legend(loc='upper right', prop=fontP, ncol=n_cols)
    # pl.title('Moving average of number of steps over episodes for ' + algorithm)
    pl.grid(True, color='gray', linestyle='dashed')
    pl.tight_layout()
    plt.savefig(complete_target_dir + 'mavg_timesteps_params' +
                get_extension())
    plt.show()