示例#1
0
def simulation_plot_results(input_data_frame):
    #plot and save the figure: 1
    file_name = "network_switching"
    plot_data_frame(input_data_frame,
                    xlabel="Node Number",
                    ylabel="Accumulated switching counts",
                    huelabel='Algorithms',
                    flag_semilogx=False,
                    save_file_name=file_name,
                    save_data_name=None)

    #plot and save the figure: 2
    file_name = "network_collision"
    plot_data_frame(input_data_frame,
                    xlabel="Node Number",
                    ylabel="Accumulated collision counts",
                    huelabel='Algorithms',
                    flag_semilogx=False,
                    save_file_name=file_name,
                    save_data_name=None)

    file_name = "network_rewards"
    plot_data_frame(input_data_frame,
                    xlabel="Node Number",
                    ylabel="Sum of rewards",
                    huelabel='Algorithms',
                    flag_semilogx=False,
                    save_file_name=file_name,
                    save_data_name=None)
示例#2
0
def simulation_execution(game_config):
    """
    simulation_execution() is the main body of the MP-MAP algorithm simulations
    """
    print("MAB game with configuration '{}' starts to play...".format(
        game_config.__repr__()))

    game_horizon = game_config.game_horizon
    alg_engine = AlgEvaluator(game_config.env_config)

    #add algorithms
    for alg_id in range(len(game_config.alg_types)):
        alg_engine.add_algorithm(
            algo_type=game_config.alg_types[alg_id],
            custome_params=game_config.alg_configs[alg_id])

    print("MAB game prepares the environment for arm type '{}' of {} rounds".
          format(game_config.env_config['env_type'], game_horizon))
    alg_engine.prepare_arm_samples()

    # simulation 1: reward plotting to compare the efficiency of the algorithms
    if "enable_efficiency_simulation" in game_config.__dict__ and game_config.enable_efficiency_simulation:
        start_time_oneshot = time.time()

        #######################################################################
        #
        if game_config.flag_parallel != True:
            #            print("starting single-process simulation...")
            alg_engine.play_game(
                flag_progress_bar=game_config.flag_progress_bar)
        else:
            #            print("starting parallel simulation...")
            alg_engine.play_game_parallel(
                flag_progress_bar=game_config.flag_progress_bar)
        #
        #######################################################################

        alg_engine.plot_rewards(save_fig=game_config.flag_save_figure,
                                save_data=game_config.save_data)

        # printing
        running_time = time.time() - start_time_oneshot
        print("Single-shot simulation completes in {} for {} iterations.".format( \
                datetime.timedelta(seconds=running_time), game_horizon))

    # simulation 2/3/4: plotting regret or total rewards over horizon
    if ("enable_regret_simulation" in game_config.__dict__ and game_config.enable_regret_simulation) or \
       ("enable_reward_simulation" in game_config.__dict__ and game_config.enable_reward_simulation) or \
       ("enable_switching_simulation" in game_config.__dict__ and game_config.enable_switching_simulation):
        start = game_config.T_start
        nb_point = game_config.T_step

        horizon_list = np.exp(
            np.linspace(np.log(start), np.log(game_horizon), nb_point))
        simu_rounds = game_config.T_simu_rounds

        start_time_repeated = time.time()

        #######################################################################
        #
        if game_config.flag_parallel != True:
            #            print("starting single-process simulation...")
            simulation_results = alg_engine.play_repeated_game(
                horizon_list,
                simulation_rounds=simu_rounds,
                flag_progress_bar=game_config.flag_progress_bar)
        else:
            #            print("starting parallel simulation...")
            simulation_results = alg_engine.play_repeated_game_parallel(
                horizon_list,
                simulation_rounds=simu_rounds,
                flag_progress_bar=game_config.flag_progress_bar)
        #
        #######################################################################

        # printing
        running_time = time.time() - start_time_repeated
        print("Repeated simulation completes in {} with maximum horizon {} in {} rounds of plays...".format(\
              datetime.timedelta(seconds=running_time), game_horizon, simu_rounds))

        # virtualization for simulation 2
        if "enable_regret_simulation" in game_config.__dict__ and game_config.enable_regret_simulation:
            # locate the reference algorithm
            optimal_alg_id = 0

            len_horizon = simulation_results['horizon'].shape[1]
            time_series = np.empty((0, len_horizon))
            alg_indicator_series = []

            avg_regret_series = np.empty((0, len_horizon))
            for alg_id in range(len(simulation_results['algorithm_name'])):
                if alg_id != optimal_alg_id:
                    # the returned value simulation_results['reward_series'] is organized as an array:
                    # (len(algorithm_ids), simulation_rounds*len(horizon_list))
                    horizon_series = simulation_results['horizon'][alg_id, :]
                    avg_regret = (
                        simulation_results['reward_series'][optimal_alg_id, :]
                        - simulation_results['reward_series'][alg_id, :]
                    ) / horizon_series

                    avg_regret_series = np.append(avg_regret_series,
                                                  avg_regret)  # flatten
                    time_series = np.append(time_series, horizon_series)

                    alg_indicator_series.extend(
                        [simulation_results['algorithm_name'][alg_id]] *
                        len(horizon_series))

            prepared_results = {}
            prepared_results['Average regret'] = avg_regret_series
            prepared_results['Total number of plays'] = time_series
            prepared_results['Algorithms'] = alg_indicator_series

            simu_data_frame = pd.DataFrame(prepared_results)

            # plot and save the figure
            file_name = "monte_carlo_regret" if game_config.flag_save_figure == True else None
            sns_figure_unused, repeated_play_data_name = plot_data_frame(
                simu_data_frame,
                xlabel="Total number of plays",
                ylabel="Average regret",
                huelabel='Algorithms',
                save_file_name=file_name,
                save_data_name=game_config.repeated_play_data_name)

            # post processing, add the theoretical bound to the figure
            flag_bound = False
            if hasattr(game_config, 'flag_regret_bound'):
                flag_bound = game_config.flag_regret_bound
            else:
                flag_bound = False

            plot_repeated_simu_results(start=start,
                                       horzion=game_horizon,
                                       nbPoints=nb_point,
                                       flag_bound=flag_bound,
                                       data_file_name=repeated_play_data_name)

        # virtualization for simulation 3
        if "enable_reward_simulation" in game_config.__dict__ and game_config.enable_reward_simulation:
            len_horizon = simulation_results['horizon'].shape[1]
            time_series = np.empty((0, len_horizon))
            alg_indicator_series = []

            reward_series = np.array([])
            for alg_id in range(len(simulation_results['algorithm_name'])):
                horizon_series = simulation_results['horizon'][alg_id, :]
                avg_rewards = simulation_results['reward_series'][
                    alg_id, :] / horizon_series

                reward_series = np.append(reward_series,
                                          avg_rewards)  # flatten
                time_series = np.append(time_series, horizon_series)
                alg_indicator_series.extend(
                    [simulation_results['algorithm_name'][alg_id]] *
                    len(horizon_series))

            prepared_results = {}
            prepared_results['Average sum of rewards'] = reward_series
            prepared_results['Total number of plays'] = time_series
            prepared_results['Algorithms'] = alg_indicator_series

            simu_data_frame = pd.DataFrame(prepared_results)

            #plot and save the figure
            file_name = "monte_carlo_rewards" if game_config.flag_save_figure == True else None
            plot_data_frame(simu_data_frame,
                            xlabel="Total number of plays",
                            ylabel="Average sum of rewards",
                            huelabel='Algorithms',
                            flag_semilogx=False,
                            save_file_name=file_name,
                            save_data_name=game_config.repeated_play_data_name)

        # virtualization for simulation 4
        if "enable_switching_simulation" in game_config.__dict__ and game_config.enable_switching_simulation:
            len_horizon = simulation_results['horizon'].shape[1]
            time_series = np.empty((0, len_horizon))
            alg_indicator_series = []

            switching_series = np.array([])
            collision_series = np.array([])

            for alg_id in range(len(simulation_results['algorithm_name'])):
                horizon_series = simulation_results['horizon'][alg_id, :]
                switching = simulation_results['switching_count_series'][
                    alg_id, :]
                collisions = simulation_results['collision_series'][alg_id, :]

                switching_series = np.append(switching_series,
                                             switching)  # flatten
                collision_series = np.append(collision_series,
                                             collisions)  # flatten

                time_series = np.append(time_series, horizon_series)
                alg_indicator_series.extend(
                    [simulation_results['algorithm_name'][alg_id]] *
                    len(horizon_series))

            prepared_results = {}
            prepared_results['Accumulated switching counts'] = switching_series
            prepared_results['Accumulated collision counts'] = collision_series
            prepared_results['Total number of plays'] = time_series
            prepared_results['Algorithms'] = alg_indicator_series

            assert len(switching_series) == len(
                collision_series
            ), "switching array must be of the same length: {}, {}".format(
                len(switching_series), len(collision_series))

            simu_data_frame = pd.DataFrame(prepared_results)

            #plot and save the figure: 1
            file_name = "monte_carlo_switching" if game_config.flag_save_figure == True else None
            plot_data_frame(simu_data_frame,
                            xlabel="Total number of plays",
                            ylabel="Accumulated switching counts",
                            huelabel='Algorithms',
                            flag_semilogx=False,
                            save_file_name=file_name,
                            save_data_name=game_config.repeated_play_data_name)

            #plot and save the figure: 2
            file_name = "monte_carlo_collision" if game_config.flag_save_figure == True else None
            plot_data_frame(simu_data_frame,
                            xlabel="Total number of plays",
                            ylabel="Accumulated collision counts",
                            huelabel='Algorithms',
                            flag_semilogx=False,
                            save_file_name=file_name,
                            save_data_name=game_config.repeated_play_data_name)
示例#3
0
import sys
import os.path
sys.path.append(
    os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))

import pandas as pd
from plotutils import plot_data_frame, plot_repeated_simu_results

data_reward = pd.read_pickle(
    'reward_data_4_alg_HetNet--2020-03-27-11-22-00.pkl')

plot_data_frame(data_reward,
                xlabel="Total number of plays",
                ylabel="Average sum of rewards",
                huelabel='Algorithms',
                flag_semilogx=False,
                save_file_name=None,
                save_data_name=None)

data_reward = pd.read_pickle(
    'reward_data_4_alg_HetNet--2020-03-27-11-22-03.pkl')

plot_data_frame(data_reward,
                xlabel="Total number of plays",
                ylabel="Accumulated switching counts",
                huelabel='Algorithms',
                flag_semilogx=False,
                save_file_name=None,
                save_data_name=None)