def main(): recalculate_bandits = True #batch_size = 1.0 num_sims = int(sys.argv[2]) outfile_directory = sys.argv[3] burn_in_size, batch_size = int( outfile_directory.split("=")[-1].split('-')[0]), int( outfile_directory.split("=")[-1].split('-')[1]) print("burn_in_size, batch_size", burn_in_size, batch_size) num_arms = 2 # if sys.argv[1] has a comma, just use the result as probability per arm if "," in sys.argv[1]: if sys.argv[1].count(",") == 1: # specifying probability per arm but not effect size prob_per_arm = [ float(armProb) for armProb in sys.argv[1].split(",") ] effect_size = 0 # Note: This will be wrong if arm probs aren't equal! else: # specifying probability per arm as first two arguments, and then effect size numeric_arguments = [ float(armProb) for armProb in sys.argv[1].split(",") ] prob_per_arm = numeric_arguments[: 2] # first two are arm probabilities effect_size = numeric_arguments[2] # final is effect size # We also need to specify n in this case for deciding on step sizes n = int(sys.argv[6]) else: # We just need effect size for this calculation effect_size = float(sys.argv[1].split("-")[0]) center = float(sys.argv[1].split("-")[1]) prob_per_arm = get_prob_per_arm_from_effect_size(effect_size, center) # Assumes we have two arms nobs_total = smp.GofChisquarePower().solve_power(effect_size, n_bins=(2 - 1) * (2 - 1) + 1, alpha=DESIRED_ALPHA, power=DESIRED_POWER) # print("Calculated nobs for effect size:", nobs_total) n = math.ceil(nobs_total) print("center", center) #step_sizes = [math.ceil(n/2), n, 2*n] # These differ from the version for normal because in normal, n represented size for one cond rather than overall size step_sizes = [ math.ceil(n / 2), n, 2 * n, 4 * n ] # These differ from the version for normal because in normal, n represented size for one cond rather than overall size print("prob_per_arm", prob_per_arm) if len(sys.argv) > 7 and sys.argv[7].startswith("forceActions"): run_effect_size_simulations.FORCE_ACTIONS = True num_to_force = float(sys.argv[7].split(",")[1]) else: num_to_force = 0 bandit_type = "Thompson" bandit_type_prefix = 'BB' if len(sys.argv) > 4: bandit_type = sys.argv[4] if bandit_type == "uniform": bandit_type_prefix = "BU" # Bernoulli rewards, uniform policy reorder_rewards = False softmax_beta = None reordering_fn = None if len(sys.argv) > 7 and not sys.argv[7].startswith("forceActions"): # softmax beta for how to reorder rewards reorder_rewards = True softmax_beta = float(sys.argv[7]) reordering_fn = reorder_samples_in_rewards.order_by_named_column( 'Action1OracleActualReward') if len(sys.argv) > 8: reordering_fn_specifier = sys.argv[8] reordering_fn = reorder_samples_in_rewards.get_reordering_fn( reordering_fn_specifier) prior_params = None if recalculate_bandits: if bandit_type == "uniform": run_simulations_uniform_random(num_sims, prob_per_arm, step_sizes, outfile_directory, forceActions=num_to_force) else: if len(sys.argv) > 5: if sys.argv[5] == "armsHigh": # Arms should be higher than the prior priorProportionOnSuccess = min( prob_per_arm) * PRIOR_PROPORTION_DIFFERENCE elif sys.argv[5] == "armsLow": # Arms should be lower than the prior priorProportionOnSuccess = 1 - ( 1 - max(prob_per_arm)) * PRIOR_PROPORTION_DIFFERENCE else: # Prior should be uniform (in between arms) priorProportionOnSuccess = .5 # Make sure the prior sums to 2, mirroring the successes/failures of uniform prior prior_params = [ priorProportionOnSuccess * 2, 2 - priorProportionOnSuccess * 2 ] print("Prior params: ", prior_params) run_simulations(num_sims, prob_per_arm, step_sizes, outfile_directory, prior_params[0], prior_params[1], softmax_beta=softmax_beta, reordering_fn=reordering_fn, forceActions=num_to_force, batch_size=batch_size, burn_in_size=burn_in_size) else: run_simulations(num_sims, prob_per_arm, step_sizes, outfile_directory, forceActions = num_to_force, batch_size = batch_size, \ burn_in_size = burn_in_size) outfile_prefix = outfile_directory + bandit_type_prefix + str(effect_size) if effect_size == 0: # Then include the n in the prefix outfile_prefix += "N" + str(n) df = calculate_statistics_from_sims(outfile_directory, num_sims, step_sizes, effect_size, DESIRED_ALPHA) df.to_pickle(outfile_prefix + 'Df.pkl') df_by_trial = calculate_by_trial_statistics_from_sims( outfile_directory, num_sims, step_sizes, effect_size, DESIRED_ALPHA) df_by_trial.to_pickle(outfile_prefix + 'DfByTrial.pkl') # Print various stats summary_text = effect_size_sim_output_viz.print_output_stats( df, prob_per_arm, False, prior_params=prior_params, reordering_info=softmax_beta) with open(outfile_prefix + 'SummaryText.txt', 'w', newline='') as outf: outf.write(summary_text) overall_stats_df = effect_size_sim_output_viz.make_overall_stats_df( df, prob_per_arm, False, effect_size) overall_stats_df.to_pickle(outfile_prefix + 'OverallStatsDf.pkl') # Make histogram hist_figure = effect_size_sim_output_viz.make_hist_of_trials(df) hist_figure.savefig(outfile_prefix + 'HistOfConditionProportions.pdf', bbox_inches='tight') # Make line plot test_stat_figure = effect_size_sim_output_viz.make_by_trial_graph_of_column( df_by_trial, 'stat') test_stat_figure.savefig(outfile_prefix + 'TestStatOverTime.pdf', bbox_inches='tight') pvalue_figure = effect_size_sim_output_viz.make_by_trial_graph_of_column( df_by_trial, 'pvalue') pvalue_figure.savefig(outfile_prefix + 'PValueOverTime.pdf', bbox_inches='tight') # Plot power power_figure = effect_size_sim_output_viz.plot_power_by_steps( df_by_trial, DESIRED_ALPHA, DESIRED_POWER) power_figure.savefig(outfile_prefix + 'PowerOverTime.pdf', bbox_inches='tight') #Plot reward reward_figure = effect_size_sim_output_viz.make_by_trial_graph_of_column( df_by_trial, 'total_reward') reward_figure = effect_size_sim_output_viz.add_expected_reward_to_figure( reward_figure, prob_per_arm, step_sizes) reward_figure.savefig(outfile_prefix + 'RewardOverTime.pdf', bbox_inches='tight') # Plot arm statistics arm_df_by_trial = create_arm_stats_by_step(outfile_directory, num_sims, step_sizes[-1], num_arms) arm_stats_figure = effect_size_sim_output_viz.make_by_trial_arm_statistics( arm_df_by_trial, num_arms) arm_stats_figure.savefig(outfile_prefix + 'ArmStats.pdf', bbox_inches='tight')
def mainBinaryRewards(): print("Running binary") recalculate_bandits = True num_sims = int(sys.argv[2]) outfile_directory = sys.argv[3] num_arms = 2 # if sys.argv[1] has a comma, just use the result as probability per arm if "," in sys.argv[1]: if sys.argv[1].count(",") == 1: # specifying probability per arm but not effect size prob_per_arm = [ float(armProb) for armProb in sys.argv[1].split(",") ] effect_size = 0 # Note: This will be wrong if arm probs aren't equal! else: # specifying probability per arm as first two arguments, and then effect size numeric_arguments = [ float(armProb) for armProb in sys.argv[1].split(",") ] prob_per_arm = numeric_arguments[: 2] # first two are arm probabilities effect_size = numeric_arguments[2] # final is effect size # We also need to specify n in this case for deciding on step sizes n = int(sys.argv[6]) else: # We just need effect size for this calculation effect_size = float(sys.argv[1]) prob_per_arm = run_effect_size_simulations_beta.get_prob_per_arm_from_effect_size( effect_size) # Assumes we have two arms nobs_total = smp.GofChisquarePower().solve_power(effect_size, n_bins=(2 - 1) * (2 - 1) + 1, alpha=DESIRED_ALPHA, power=DESIRED_POWER) # print("Calculated nobs for effect size:", nobs_total) n = math.ceil(nobs_total) # step_sizes = [math.ceil(n/2), n, 2*n, 4*n] # These differ from the version for normal because in normal, n represented size for one cond rather than overall size if len(sys.argv) > 7 and sys.argv[7].startswith("forceActions"): FORCE_ACTIONS = True num_to_force = float(sys.argv[7].split(",")[1]) else: num_to_force = 0 bandit_type = "Thompson" bandit_type_prefix = 'BB' if len(sys.argv) > 4: bandit_type = sys.argv[4] if bandit_type == "uniform": bandit_type_prefix = "BU" # Bernoulli rewards, uniform policy reorder_rewards = False softmax_beta = None reordering_fn = None if len(sys.argv) > 7 and not sys.argv[7].startswith("forceActions"): # softmax beta for how to reorder rewards reorder_rewards = True try: softmax_beta = float(sys.argv[7]) reordering_fn = reorder_samples_in_rewards.order_by_named_column( 'Action1OracleActualReward') if len(sys.argv) > 8: reordering_fn_specifier = sys.argv[8] reordering_fn = reorder_samples_in_rewards.get_reordering_fn( reordering_fn_specifier) except: print("Parsing error:", sys.exc_info()[0]) # different kind of argument num_samples_before_switch = -1 if len(sys.argv) > 8 and sys.argv[8].startswith("numSamples:"): num_samples_array = sys.argv[8].split(":")[1:] num_samples_before_switch = int(num_samples_array[0]) num_samples_after_switch = int(num_samples_array[1]) if len(sys.argv) > 9 and sys.argv[9].startswith("switchIfNonSig:"): switch_to_best_if_nonsignificant = sys.argv[9].split( ":")[1].lower() == "true" else: switch_to_best_if_nonsignificant = False # n here is what's required for .8 power (number in both conditions) step_sizes_before_switch = [int(round(0.25 * n)), int(round(0.5 * n)), n] #, 2*n] if len(sys.argv) > 10 and sys.argv[10].startswith("multiplier:"): multiplier = int(sys.argv[10].split(":")[1]) print("multiplier:", multiplier) else: multiplier = 5 step_sizes = [(multiplier + 1) * step_size for step_size in step_sizes_before_switch] prior_params = None if recalculate_bandits: if bandit_type == "uniform": if num_samples_before_switch > 0: step_sizes = [ num_samples_before_switch + num_samples_after_switch ] run_simulations_uniform_random_binary( num_sims, prob_per_arm, num_samples_before_switch, num_samples_after_switch, outfile_directory, forceActions=num_to_force, switch_to_best_if_nonsignificant= switch_to_best_if_nonsignificant) else: for num_steps in step_sizes_before_switch: run_simulations_uniform_random_binary( num_sims, prob_per_arm, num_steps, num_steps * multiplier, outfile_directory, forceActions=num_to_force, switch_to_best_if_nonsignificant= switch_to_best_if_nonsignificant) else: if len(sys.argv) > 5: if sys.argv[5] == "armsHigh": # Arms should be higher than the prior priorProportionOnSuccess = min( prob_per_arm ) * run_effect_size_simulations_beta.PRIOR_PROPORTION_DIFFERENCE elif sys.argv[5] == "armsLow": # Arms should be lower than the prior priorProportionOnSuccess = 1 - ( 1 - max(prob_per_arm) ) * run_effect_size_simulations_beta.PRIOR_PROPORTION_DIFFERENCE else: # Prior should be uniform (in between arms) priorProportionOnSuccess = .5 # Make sure the prior sums to 2, mirroring the successes/failures of uniform prior prior_params = [ priorProportionOnSuccess * 2, 2 - priorProportionOnSuccess * 2 ] print("Prior params: ", prior_params) run_effect_size_simulations_beta.run_simulations( num_sims, prob_per_arm, step_sizes, outfile_directory, prior_params[0], prior_params[1], softmax_beta=softmax_beta, reordering_fn=reordering_fn, forceActions=num_to_force) else: run_effect_size_simulations_beta.run_simulations( num_sims, prob_per_arm, step_sizes, outfile_directory, forceActions=num_to_force) outfile_prefix = outfile_directory + bandit_type_prefix + str(effect_size) if effect_size == 0: # Then include the n in the prefix outfile_prefix += "N" + str(n) df = calculate_statistics_from_sims(outfile_directory, num_sims, step_sizes, effect_size, switch_to_best_if_nonsignificant, step_sizes_before_switch, DESIRED_ALPHA, is_binary=True) df.to_pickle(outfile_prefix + 'Df.pkl') df_by_trial = calculate_by_trial_statistics_from_sims( outfile_directory, num_sims, step_sizes, effect_size, DESIRED_ALPHA)
def main(): start_time = time.time() outfile_directory = sys.argv[3] random_dur_m = 0 random_start_r = 0 recalculate_bandits = True num_arms = 2 #batch_size = 1.0 # if len(sys.argv) > 5: # epsilon = float(sys.argv[5]) # print("epsilon", epsilon) if "epsilon=" in outfile_directory: print(outfile_directory) epsilon = float( outfile_directory.split("epsilon=")[-1].split("/")[0].strip("=")) #c = float(outfile_directory.split("=c=")[-1]) print("epsilon", epsilon) num_sims = int(sys.argv[2]) burn_in_size, batch_size = int( outfile_directory.split("=")[-1].split('-')[0]), int( outfile_directory.split("=")[-1].split('-')[1]) print("burn_in_size, batch_size", burn_in_size, batch_size) # if sys.argv[1] has a comma, just use the result as probability per arm if "," in sys.argv[1]: if sys.argv[1].count(",") == 1: # specifying probability per arm but not effect size prob_per_arm = [ float(armProb) for armProb in sys.argv[1].split(",") ] effect_size = 0 # Note: This will be wrong if arm probs aren't equal! else: # specifying probability per arm as first two arguments, and then effect size numeric_arguments = [ float(armProb) for armProb in sys.argv[1].split(",") ] prob_per_arm = numeric_arguments[: 2] # first two are arm probabilities effect_size = numeric_arguments[2] # final is effect size # We also need to specify n in this case for deciding on step sizes n = int(sys.argv[6]) else: # We just need effect size for this calculation effect_size = float(sys.argv[1].split("-")[0]) center = float(sys.argv[1].split("-")[1]) prob_per_arm = get_prob_per_arm_from_effect_size(effect_size, center) # Assumes we have two arms nobs_total = smp.GofChisquarePower().solve_power(effect_size, n_bins=(2 - 1) * (2 - 1) + 1, alpha=DESIRED_ALPHA, power=DESIRED_POWER) #print("Calculated nobs for effect size:", nobs_total) n = math.ceil(nobs_total) print("center", center) ''' These differ from the version for normal because in normal, n represented size for one cond rather than overall size step_sizes = [math.ceil(n/2), n, 2*n, 4*n] ''' #Arghavan: Just run simulation for n step_sizes = [4 * n] print("prob_per_arm", prob_per_arm) if len(sys.argv) > 7 and sys.argv[7].startswith("forceActions"): run_effect_size_simulations.FORCE_ACTIONS = True num_to_force = float(sys.argv[7].split(",")[1]) else: num_to_force = 1 #force one action from each arm to avoid nan means bandit_type = "Thompson" bandit_type_prefix = 'BB' if len(sys.argv) > 4: bandit_type = sys.argv[4] if bandit_type == "uniform": bandit_type_prefix = "BU" # Bernoulli rewards, uniform policy reorder_rewards = False softmax_beta = None reordering_fn = None if len(sys.argv) > 7 and not sys.argv[7].startswith("forceActions"): # softmax beta for how to reorder rewards reorder_rewards = True softmax_beta = float(sys.argv[7]) reordering_fn = reorder_samples_in_rewards.order_by_named_column( 'Action1OracleActualReward') if len(sys.argv) > 8: reordering_fn_specifier = sys.argv[8] reordering_fn = reorder_samples_in_rewards.get_reordering_fn( reordering_fn_specifier) prior_params = None if recalculate_bandits: if bandit_type == "uniform": results_dfs_list, results_output_names = run_simulations( num_sims, prob_per_arm, step_sizes, outfile_directory, forceActions=num_to_force, mode='uniform') else: if len(sys.argv) > 5: if sys.argv[5] == "armsHigh": # Arms should be higher than the prior priorProportionOnSuccess = min( prob_per_arm) * PRIOR_PROPORTION_DIFFERENCE elif sys.argv[5] == "armsLow": # Arms should be lower than the prior priorProportionOnSuccess = 1 - ( 1 - max(prob_per_arm)) * PRIOR_PROPORTION_DIFFERENCE else: # Prior should be uniform (in between arms) priorProportionOnSuccess = .5 # Make sure the prior sums to 2, mirroring the successes/failures of uniform prior prior_params = [ priorProportionOnSuccess * 2, 2 - priorProportionOnSuccess * 2 ] print("Prior params: ", prior_params) results_dfs_list, results_output_names = run_simulations( num_sims, prob_per_arm, step_sizes, outfile_directory, prior_params[0], prior_params[1], softmax_beta=softmax_beta, reordering_fn=reordering_fn, forceActions=num_to_force, batch_size=batch_size, burn_in_size=burn_in_size, random_dur=random_dur_m, random_start=random_start_r, epsilon=epsilon) else: results_dfs_list, results_output_names = run_simulations( num_sims, prob_per_arm, step_sizes, outfile_directory, forceActions=num_to_force, batch_size=batch_size, burn_in_size=burn_in_size, epsilon=epsilon) outfile_prefix = outfile_directory + bandit_type_prefix + str(effect_size) if effect_size == 0: # Then include the n in the prefix outfile_prefix += "N" + str(n) for results_df, results_output_name in zip(results_dfs_list, results_output_names): results_df['SampleNumber'] = results_df.index if num_sims <= 2: results_df.to_csv('{}_sims={}_m={}.csv'.format( results_output_name, num_sims, random_dur_m), index=False) #Not saving for now # results_df.to_csv('{}_sims={}_m={}.csv.gz'.format(results_output_name, num_sims, random_dur_m), compression = "gzip", index=False) stats_df = calculate_statistics_from_sims(results_dfs_list, effect_size, num_arms, alpha=0.05) stats_df.to_pickle(outfile_prefix + 'Df_sim={}_m={}_r={}.pkl'.format( num_sims, random_dur_m, random_start_r)) end_time = time.time() print('Execution time = %.6f seconds' % (end_time - start_time))
def main(): recalculate_bandits = True mean1, mu1 = get_mean_and_prior_from_string(sys.argv[1]) mean2, mu2 = get_mean_and_prior_from_string(sys.argv[2]) if mu1 != mu2 and mu2 != 0: print("Error: different priors on the arms aren't implemented for normal bandits.") exit() means = [mean1, mean2] if mean1 == mean2: # effect size must be 0 - interpret third argument as the variance to use for the arms variance = float(sys.argv[3]) # n, for basing number of steps off of, also has to be set n = int(sys.argv[7]) # equal arm means indicates there's no effect effect_size = 0 elif len(sys.argv) > 8 and sys.argv[8].startswith("fixedVariance"): # We're running a simulation based on an existing experiment, so we want to set the variance # manually variance = [float(num) for num in sys.argv[3].split(",")] # n, for basing number of steps off of, also has to be set n = int(sys.argv[7]) # equal arm means indicates there's no effect effect_size = 0 #TODO: fix! else: effect_size = float(sys.argv[3]) variance = get_var_from_effect_size(mean1, mean2, effect_size) nobs1 = statsmodels.stats.power.tt_ind_solve_power(effect_size, None, DESIRED_ALPHA, DESIRED_POWER, 1) n = math.ceil(nobs1) num_sims = int(sys.argv[4]) outfile_directory = sys.argv[5] bandit_type = "Thompson" bandit_type_prefix = 'NG' if len(sys.argv) > 6: bandit_type = sys.argv[6] if bandit_type == "uniform": bandit_type_prefix = "NU"# Normal rewards, uniform policy if len(sys.argv) > 8 and sys.argv[8].startswith("forceActions"): FORCE_ACTIONS = True num_to_force = float(sys.argv[8].split(",")[1]) else: num_to_force = 0 reorder_rewards = False softmax_beta = None if len(sys.argv) > 8 and not sys.argv[8].startswith("forceActions") and not sys.argv[8].startswith("fixedVariance"): # softmax beta for how to reorder rewards reorder_rewards = True softmax_beta = float(sys.argv[8]) reordering_fn = reorder_samples_in_rewards.order_by_named_column('Action1OracleActualReward') if len(sys.argv) > 9: reordering_fn_specifier = sys.argv[9] reordering_fn = reorder_samples_in_rewards.get_reordering_fn(reordering_fn_specifier) num_arms = 2 step_sizes = [n, 2*n, 4*n, 8*n] if recalculate_bandits: if bandit_type == "uniform": run_simulations_uniform_random(num_sims, means, variance, step_sizes, outfile_directory, forceActions = num_to_force) else: if reorder_rewards: run_simulations(num_sims, means, variance, step_sizes, outfile_directory, softmax_beta, reordering_fn, prior_mean = mu1, forceActions = num_to_force) else: run_simulations(num_sims, means, variance, step_sizes, outfile_directory, prior_mean = mu1, forceActions = num_to_force) outfile_prefix = outfile_directory + bandit_type_prefix + str(effect_size); if effect_size == 0: # Then include the n and the arm variance in the prefix outfile_prefix += "N" + str(n) + "Var" + str(variance) df = calculate_statistics_from_sims(outfile_directory, num_sims, step_sizes, effect_size, DESIRED_ALPHA) df.to_pickle(outfile_prefix + 'Df.pkl') df_by_trial = calculate_by_trial_statistics_from_sims(outfile_directory, num_sims, step_sizes, effect_size, DESIRED_ALPHA) df_by_trial.to_pickle(outfile_prefix + 'DfByTrial.pkl') # Print various stats summary_text = effect_size_sim_output_viz.print_output_stats(df, means + [variance], True, effect_size, reordering_info = softmax_beta) with open(outfile_prefix + 'SummaryText.txt', 'w', newline='') as outf: outf.write(summary_text) overall_stats_df = effect_size_sim_output_viz.make_overall_stats_df(df, means + [variance], True, effect_size) overall_stats_df.to_pickle(outfile_prefix + 'OverallStatsDf.pkl') # Make histogram hist_figure = effect_size_sim_output_viz.make_hist_of_trials(df) hist_figure.savefig(outfile_prefix + 'HistOfConditionProportions.pdf', bbox_inches='tight') # Make line plot test_stat_figure = effect_size_sim_output_viz.make_by_trial_graph_of_column(df_by_trial, 'stat') test_stat_figure.savefig(outfile_prefix + 'TestStatOverTime.pdf', bbox_inches='tight') pvalue_figure = effect_size_sim_output_viz.make_by_trial_graph_of_column(df_by_trial, 'pvalue') pvalue_figure.savefig(outfile_prefix + 'PValueOverTime.pdf', bbox_inches='tight') # Plot power power_figure = effect_size_sim_output_viz.plot_power_by_steps(df_by_trial, DESIRED_ALPHA, DESIRED_POWER) power_figure.savefig(outfile_directory + bandit_type_prefix + str(effect_size) + 'PowerOverTime.pdf', bbox_inches='tight') #Plot reward reward_figure = effect_size_sim_output_viz.make_by_trial_graph_of_column(df_by_trial, 'total_reward') reward_figure = effect_size_sim_output_viz.add_expected_reward_to_figure(reward_figure, means, step_sizes) reward_figure.savefig(outfile_prefix + 'RewardOverTime.pdf', bbox_inches='tight') # Plot arm statistics arm_df_by_trial = create_arm_stats_by_step(outfile_directory, num_sims, step_sizes[-1], num_arms) arm_stats_figure = effect_size_sim_output_viz.make_by_trial_arm_statistics(arm_df_by_trial, num_arms) arm_stats_figure.savefig(outfile_prefix + 'ArmStats.pdf', bbox_inches='tight')