def run_simulations(num_sims,
                    mean_list,
                    variance,
                    step_sizes,
                    outfile_directory,
                    softmax_beta=None,
                    reordering_fn=None,
                    prior_mean=0,
                    forceActions=0):
    '''
    Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). 
    Bandit uses the thompson_ng sampling policy.
    '''

    for i in range(num_sims):
        for num_steps in step_sizes:
            if forceActions != 0:
                print("Forcing actions:", forceActions)
                forced = make_forced_actions(len(mean_list), num_steps,
                                             forceActions)
            else:
                forced = forced_actions()
            cur_reward_file = get_rewards_filename(outfile_directory,
                                                   num_steps, i)
            # Check if they've passed in one variance for everything or multiple variances
            if not hasattr(variance, '__len__'):
                # only one variance - turn into a list
                variances = [variance] * len(mean_list)
            else:
                # multiple variances - pass straight through
                variances = variance

            generate_single_bandit.generate_normal_distribution_file(
                mean_list, variances, num_steps, cur_reward_file)
            if softmax_beta != None:
                # reorder rewards
                reordered_reward_file = get_reordered_rewards_filename(
                    outfile_directory, num_steps, i)
                reorder_samples_in_rewards.reorder_rewards_by_quartile(
                    cur_reward_file, reordered_reward_file, reordering_fn,
                    softmax_beta)
            else:
                reordered_reward_file = cur_reward_file
            cur_output_file = get_output_filename(outfile_directory, num_steps,
                                                  i)
            models = [
                ng_normal.NGNormal(mu=prior_mean, k=1, alpha=1, beta=1)
                for _ in range(len(mean_list))
            ]
            thompson_ng_policy.calculate_thompson_single_bandit(
                reordered_reward_file,
                num_actions=len(mean_list),
                dest=cur_output_file,
                models=models,
                action_mode=thompson_ng_policy.ActionSelectionMode.
                prob_is_best,
                relearn=True,
                forced=forced)
def run_simulations_uniform_random(num_sims,
                                   mean_list,
                                   variance,
                                   steps_before_switch,
                                   steps_after_switch,
                                   outfile_directory,
                                   forceActions=0,
                                   switch_to_best_if_nonsignificant=True):
    '''
    Runs num_sims bandit simulations with several different sample sizes (those in the list step_sizes). 
    Samples uniformly at random.
    '''

    for i in range(num_sims):
        if forceActions != 0:
            print("Forcing actions:", forceActions)
            forced = make_forced_actions(len(mean_list), steps_before_switch,
                                         forceActions)
        else:
            forced = forced_actions()
        cur_reward_file = get_rewards_filename(
            outfile_directory, steps_before_switch + steps_after_switch, i)
        # Check if they've passed in one variance for everything or multiple variances
        if not hasattr(variance, '__len__'):
            # only one variance - turn into a list
            variances = [variance] * len(mean_list)
        else:
            # multiple variances - pass straight through
            variances = variance
        generate_single_bandit.generate_normal_distribution_file(
            mean_list, variances, steps_before_switch + steps_after_switch,
            cur_reward_file)
        #
        cur_output_file = get_output_filename(
            outfile_directory, steps_before_switch + steps_after_switch, i)
        models = [
            ng_normal.NGNormal(mu=0, k=1, alpha=1, beta=1)
            for _ in range(len(mean_list))
        ]

        thompson_ng_policy.calculate_thompson_switch_to_fixed_policy(
            cur_reward_file,
            num_actions=len(mean_list),
            dest=cur_output_file,
            num_actions_before_switch=steps_before_switch,
            models=models,
            switch_to_best_if_nonsignificant=switch_to_best_if_nonsignificant,
            epsilon=1.0,
            action_mode=thompson_ng_policy.ActionSelectionMode.prob_is_best,
            forced=forced)