def sim_fixed(muA, muB, sample_size, n_experiments, sim_data=None, relative_mde_value=0.02, toc_adj_factor=0.00025, fn=None): if sim_data is None: sim_data = helper.sim_binomial_seq([muA, muB], sample_size=sample_size, n_experiments=n_experiments) el_res = ['U'] * n_experiments el_lift = [0] * n_experiments for s in range(n_experiments): sa = sim_data[s][0][sample_size - 1] sb = sim_data[s][1][sample_size - 1] res = el_core.calc_stat(sa, sample_size, sb, sample_size, relative_mde_value, toc_adj_factor) el_res[s] = res['el_res'] el_lift[s] = res['lift'] res = pd.Series(el_res).value_counts().to_dict() helper.fprint( "[Parameters]: muA: {}, muB: {}, sample_size: {:,}, n_experiments: {:,}," " relative_mde_value: {}, toc: {:.10f}".format(muA, muB, sample_size, n_experiments, relative_mde_value, toc), fn) winner_lift = [] if muA == muB: for i, r in enumerate(el_res): if r in (['A', 'B']): winner_lift.append(el_lift[i]) helper.fprint( "[EL] Fixed - Type 1 error rate: {:.4f}, error_avg_lift: {:.5f}, el_res: {}" .format(1 - (res.get('U', 0) + res.get('E', 0)) / n_experiments, np.mean(abs(np.array(winner_lift))), res), fn) else: if muA > muB: winner = 'A' else: winner = 'B' for i, r in enumerate(el_res): if r == winner: winner_lift.append(el_lift[i]) helper.fprint( "[EL] Fixed - Power: {:.4f}, avg_lift: {:.5f}, el_res: {}".format( res.get(winner, 0) / n_experiments, np.mean(abs(np.array(winner_lift))), res), fn) return {'el_res': el_res, 'el_lift': el_lift}
def sim_fixed(muA, muB, sample_size, n_experiments, sim_data=None, alpha=0.05, fn=None): if sim_data is None: sim_data = helper.sim_binomial_seq([muA, muB], sample_size=sample_size, n_experiments=n_experiments, cumsum=False) ttest_res = ['U']*n_experiments ttest_effect = [0]*n_experiments for s in range(n_experiments): sample_a = sim_data[s][0] sample_b = sim_data[s][1] ret = ttest_core.calc_stat(sample_a, sample_b, alpha) ttest_effect[s] = ret['effect'] if (ret['p_value'] < alpha): if ret['cvr_a'] > ret['cvr_b']: ttest_res[s] = 'A' else: ttest_res[s] = 'B' helper.fprint("[Parameters]: muA: {}, muB: {}, sample_size: {:,}, n_experiments: {:,}" .format(muA, muB, sample_size, n_experiments), fn) winner_effect = [] res = pd.Series(ttest_res).value_counts().to_dict() if muA == muB: for i, r in enumerate(ttest_res): if r in (['A','B']): winner_effect.append(ttest_effect[i]) helper.fprint("[T-Test] Fixed - Type 1 error rate: {:.4f}, error_avg_effect: {:.5f}, ttest_res: {}" .format(1-(res.get('U',0)+res.get('E',0))/n_experiments, np.mean(abs(np.array(winner_effect))), res), fn) else: if muA > muB: winner = 'A' else: winner = 'B' for i, r in enumerate(ttest_res): if r == winner: winner_effect.append(ttest_effect[i]) helper.fprint("[T-Test] Fixed - Power: {:.4f}, avg_effect: {:.5f}, ttest_res: {}" .format(res.get(winner,0)/n_experiments, np.mean(abs(np.array(winner_effect))), res), fn) return {'ttest_res': ttest_res, 'ttest_effect': ttest_effect}
def sim_fixed(muA, muB, sample_size, n_experiments, sim_data=None, relative_mde_value=0.02, alpha=0.05): if sim_data is None: sim_data = helper.sim_binomial_seq([muA, muB], sample_size=sample_size, n_experiments=n_experiments) msprt_res = ['U']*n_experiments msprt_effect = [0]*n_experiments for s in range(n_experiments): sa = sim_data[s][0][sample_size-1] sb = sim_data[s][1][sample_size-1] res = msprt_core.calc_stat(sa, sample_size, sb, sample_size, relative_mde_value, alpha, tau_option, tau_constant) msprt_res[s] = res['msprt_res'] msprt_effect[s] = res['effect'] res = pd.Series(msprt_res).value_counts().to_dict() absolute_mde_value = muA*relative_mde_value min_msprt_half_width = absolute_mde_value*msprt_mde_ratio helper.fprint("[Parameters]: muA:{}, muB:{}, sample_size:{:,}, n_experiments:{:,}," " min_msprt_half_width:{:.6f}" .format(muA, muB, sample_size, n_experiments, relative_mde_value), fn) winner_effect = [] if muA == muB: for i, r in enumerate(msprt_res): if r in (['A','B']): winner_effect.append(msprt_effect[i]) helper.fprint("[mSPRT] Fixed - Type 1 error rate:{:.4f}, error_avg_effect:{:.5f}, msprt_res:{}" .format(1-(res.get('U',0)+res.get('E',0))/n_experiments, np.mean(abs(np.array(winner_effect))), res), fn) else: if muA > muB: winner = 'A' else: winner = 'B' for i, r in enumerate(msprt_res): if r == winner: winner_effect.append(msprt_effect[i]) helper.fprint("[mSPRT] Fixed - Power:{:.4f}, avg_effect:{:.5f}, msprt_res:{}" .format(res.get(winner,0)/n_experiments, np.mean(abs(np.array(winner_effect))), res), fn) return {'msprt_res': msprt_res, 'msprt_effect': msprt_effect}
def sim_peeking(muA, muB, sample_size, n_experiments, n_peeks=-1, start=None, alpha=0.05, relative_mde_value=0.02, burnIn=300, random_seed=-1, pr_peek=1, pr_fixed=False, fn=None): if random_seed > 0: np.random.seed(random_seed) start_time = time.time() helper.fprint("[Start time]: {}".format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())), fn) ss = helper.sample_size(muA, relative_mde_value=relative_mde_value) freq_sample_size = ss['variant_sample_size'] if isinstance(start, float): assert (0 < start and start < 1) start = int(freq_sample_size*start) if n_peeks == -1: step = 1 else: step = int((sample_size-start)/(n_peeks-1)) elif isinstance(start, int): assert (1 <= start and start <= sample_size) if n_peeks == -1: step = 1 else: step = int((sample_size-start)/(n_peeks-1)) else: if n_peeks == -1: start = 1 step = 1 else: start = int(sample_size/n_peeks) step = start # print parameter settings helper.fprint("[Parameters]: muA: {}, muB: {}, sample_size: {:,}, n_experiments: {:,}," " relative_mde_value:{:.5f}, alpha: {:.3f}, burnIn:{:,}, random_seed:{}," " n_peeks: {:,}, start: {:,}, step: {:,}" .format(muA, muB, sample_size, n_experiments, relative_mde_value, alpha, burnIn, random_seed, n_peeks, start, step), fn) sim_data = helper.sim_binomial_seq([muA, muB], sample_size=sample_size, n_experiments=n_experiments, cumsum=False) sim_res_data = [] ttest_res = ['U']*n_experiments ttest_ss = [0]*n_experiments ttest_effect = [0]*n_experiments peek_idx = 0 for n in range(start, sample_size+1, step): peek_idx += 1 peek_effect = [] for s in range(n_experiments): if ttest_res[s] == 'U': sample_a = sim_data[s][0][:n] sample_b = sim_data[s][1][:n] ret = ttest_core.calc_stat(sample_a, sample_b, alpha, burnIn) cvr_a = ret['cvr_a'] cvr_b = ret['cvr_b'] effect = ret['effect'] ttest_res[s] = ret['ttest_res'] ttest_ss[s] = n ttest_effect[s] = effect peek_effect.append(effect) if (ret['p_value'] < alpha): if cvr_a > cvr_b: ttest_res[s] = 'A' else: ttest_res[s] = 'B' peek_effect.append(effect) res = pd.Series(ttest_res).value_counts().to_dict() peek_avg_effect = np.mean(abs(np.array(peek_effect))) sim_res_data_dict = {} sim_res_data_dict.update(res) sim_res_data_dict['peek_id'] = peek_idx sim_res_data_dict['samples'] = n sim_res_data_dict['avg_effect'] = peek_avg_effect sim_res_data.append(sim_res_data_dict) # print peek summary if pr_peek > 0 and peek_idx%pr_peek == 0: helper.fprint("[{}]: Peek #{} @ {:,} samples, avg_effect: {:.5f}, ttest_res: {}" .format(time.strftime("%m-%d %H:%M:%S", time.localtime()), peek_idx, n, peek_avg_effect, res), fn) if 'U' not in ttest_res: break # print parameter settings helper.fprint("[Parameters]: muA: {}, muB: {}, sample_size: {:,}, n_experiments: {:,}," " relative_mde_value:{:.5f}, alpha: {:.3f}, burnIn:{:,}, random_seed:{}," " n_peeks: {:,}, start: {:,}, step: {:,}" .format(muA, muB, sample_size, n_experiments, relative_mde_value, alpha, burnIn, random_seed, n_peeks, start, step), fn) # print simulation summary winner_ss = [] winner_effect = [] if muA == muB: for i, r in enumerate(ttest_res): if r in (['A','B']): winner_effect.append(ttest_effect[i]) helper.fprint("[T-Test] {:,} peeks - Type 1 error rate: {:.4f}," "error_avg_effect: {:.5f}, ttest_res: {}" .format(peek_idx, 1-(res.get('U',0)+res.get('E',0))/n_experiments, np.mean(abs(np.array(winner_effect))), res), fn) else: if muA > muB: winner = 'A' else: winner = 'B' for i, r in enumerate(ttest_res): if r == winner: winner_ss.append(ttest_ss[i]) winner_effect.append(ttest_effect[i]) helper.fprint("[T-Test] {:,} peeks - Power: {:.4f}, avg_sample_size: {:,.0f}, sample_size_ratio: {:.2f}%," " avg_effect: {:.5f}, ttest_res: {}" .format(peek_idx, res.get(winner,0)/n_experiments, np.mean(winner_ss), np.mean(winner_ss)/freq_sample_size*100, np.mean(abs(np.array(winner_effect))), res), fn) if pr_fixed: sim_fixed(muA, muB, sample_size, n_experiments, sim_data, alpha) helper.fprint("[Elasped time]: {}".format(str(timedelta(seconds=time.time()-start_time))), fn) helper.fprint("[End time]: {}".format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())), fn) return {'sim_res_df': pd.DataFrame(sim_res_data), 'ttest_res': ttest_res, 'ttest_effect': ttest_effect}
def sim_peeking(muA, muB, sample_size, n_experiments, n_peeks=-1, start=None, relative_mde_value=0.02, rope_hdi_ratio=0.6, rope_mde_ratio=0.375, hdi_cred_mass=0.95, rvs_size=1000000, pr_peeking=True, pr_fixed=False, fn=None): start_time = time.time() helper.fprint( "[Start time]: {}".format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())), fn) ss = helper.sample_size(muA, mde_value=relative_mde_value, relative_mde=True) freq_sample_size = ss['variant_sample_size'] if isinstance(start, float): assert (0 < start and start < 1) start = int(freq_sample_size * start) if n_peeks == -1: step = 1 else: step = int((sample_size - start) / (n_peeks - 1)) elif isinstance(start, int): assert (1 <= start and start <= sample_size) if n_peeks == -1: step = 1 else: step = int((sample_size - start) / (n_peeks - 1)) else: if n_peeks == -1: start = 1 step = 1 else: start = int(sample_size / n_peeks) step = start absolute_mde_value = muA * relative_mde_value min_rope_half_width = absolute_mde_value * rope_mde_ratio helper.fprint( "[Parameters]: muA: {}, muB: {}, sample_size: {:,}, n_experiments: {:,}," " relative_mde_value: {}, rope_hdi_ratio: {:.2f}, rope_mde_ratio: {:.3f}," " min_rope_half_width: {:.5f}, n_peeks: {:,}, start: {:,}, step: {:,}, rvs_size: {:,}" .format(muA, muB, sample_size, n_experiments, relative_mde_value, rope_hdi_ratio, rope_mde_ratio, min_rope_half_width, n_peeks, start, step, rvs_size), fn) sim_data = helper.sim_binomial_seq([muA, muB], sample_size=sample_size, n_experiments=n_experiments) sim_res_data = [] rope_res = ['U'] * n_experiments rope_ss = [0] * n_experiments # record the sample size when concluded rope_lift = [0] * n_experiments peek_idx = 0 for n in range(start, sample_size + 1, step): peek_idx += 1 peek_hdi_width = [] peek_rope_width = [] peek_lift = [] for s in range(n_experiments): if rope_res[s] == 'U': sa = sim_data[s][0][n - 1] sb = sim_data[s][1][n - 1] res = rope_core.calc_stat(sa, n, sb, n, relative_mde_value, rope_hdi_ratio, rope_mde_ratio, hdi_cred_mass, rvs_size) rope_res[s] = res['rope_res'] rope_ss[s] = n rope_lift[s] = res['lift'] peek_hdi_width.append(res['lift_hdi_width']) peek_rope_width.append(res['rope_width']) if res['rope_res'] != 'U': peek_lift.append(res['lift']) res = pd.Series(rope_res).value_counts().to_dict() peek_avg_hdi_width = np.mean(peek_hdi_width) peek_avg_rope_width = np.mean(peek_rope_width) peek_avg_lift = np.mean(abs(np.array(peek_lift))) sim_res_data_dict = {} sim_res_data_dict.update(res) sim_res_data_dict['peek_id'] = peek_idx sim_res_data_dict['samples'] = n sim_res_data_dict['avg_hdi_width'] = peek_avg_hdi_width sim_res_data_dict['avg_rope_width'] = peek_avg_rope_width sim_res_data_dict['avg_lift'] = peek_avg_lift sim_res_data.append(sim_res_data_dict) # Summary for each peek if pr_peeking: helper.fprint( "[{}]: Peek #{:,} @ {:,} samples, avg_hdi_width: {:.5f}, avg_rope_width: {:.5f}," " avg_lift: {:.5f}, rope_res: {}".format( time.strftime("%m-%d %H:%M:%S", time.localtime()), peek_idx, n, peek_avg_hdi_width, peek_avg_rope_width, peek_avg_lift, res), fn) if 'U' not in rope_res: break # Simulation summary helper.fprint( "[Parameters]: muA: {}, muB: {}, sample_size: {:,}, n_experiments: {:,}," " relative_mde_value: {}," " rope_hdi_ratio: {:.2f}, rope_mde_ratio: {:.3f}, min_rope_half_width: {:.5f}," " n_peeks: {:,}, start: {:,}, step: {:,}, rvs_size: {:,}".format( muA, muB, sample_size, n_experiments, relative_mde_value, rope_hdi_ratio, rope_mde_ratio, min_rope_half_width, n_peeks, start, step, rvs_size), fn) winner_ss = [] winner_lift = [] if muA == muB: for i, r in enumerate(rope_res): if r in (['A', 'B']): winner_lift.append(rope_lift[i]) helper.fprint( "[ROPE] {:,} peeks - Type 1 error rate: {:.4f}, error_avg_lift: {:.5f}, rope_res: {}" .format(peek_idx, 1 - (res.get('U', 0) + res.get('E', 0)) / n_experiments, np.mean(abs(np.array(winner_lift))), res), fn) else: if muA > muB: winner = 'A' else: winner = 'B' for i, r in enumerate(rope_res): if r == winner: winner_ss.append(rope_ss[i]) winner_lift.append(rope_lift[i]) helper.fprint( "[ROPE] {:,} peeks - Power: {:.4f}, avg_sample_size: {:,.0f}," " sample_size_ratio: {:.2f}%," " avg_lift: {:.5f}, rope_res: {}".format( peek_idx, res.get(winner, 0) / n_experiments, np.mean(winner_ss), np.mean(winner_ss) / freq_sample_size * 100, np.mean(abs(np.array(winner_lift))), res), fn) # Show fixed sample size result if pr_fixed: sim_fixed(muA, muB, sample_size, n_experiments, sim_data, relative_mde_value, rope_hdi_ratio, rope_mde_ratio, hdi_cred_mass, rvs_size, fn) helper.fprint( "[Elasped time]: {}".format( str(timedelta(seconds=time.time() - start_time))), fn) helper.fprint( "[End time]: {}".format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())), fn) return { 'sim_res_df': pd.DataFrame(sim_res_data), 'rope_res': rope_res, 'rope_lift': rope_lift }
def sim_fixed(muA, muB, sample_size, n_experiments, sim_data=None, relative_mde_value=0.02, rope_hdi_ratio=0.6, rope_mde_ratio=0.375, hdi_cred_mass=0.95, rvs_size=1000000, fn=None): if sim_data is None: sim_data = helper.sim_binomial_seq([muA, muB], sample_size=sample_size, n_experiments=n_experiments) rope_res = ['U'] * n_experiments rope_lift = [0] * n_experiments for s in range(n_experiments): sa = sim_data[s][0][sample_size - 1] sb = sim_data[s][1][sample_size - 1] res = rope_core.calc_stat(sa, sample_size, sb, sample_size, relative_mde_value, rope_hdi_ratio, rope_mde_ratio, hdi_cred_mass, rvs_size) rope_res[s] = res['rope_res'] rope_lift[s] = res['lift'] res = pd.Series(rope_res).value_counts().to_dict() absolute_mde_value = muA * relative_mde_value min_rope_half_width = absolute_mde_value * rope_mde_ratio helper.fprint( "[Parameters]: muA: {}, muB: {}, sample_size: {:,}, n_experiments: {:,}," " relative_mde_value: {}, rope_hdi_ratio: {}, rope_mde_ratio: {}," " min_rope_half_width: {:.6f}, rvs_size: {:,}".format( muA, muB, sample_size, n_experiments, relative_mde_value, rope_hdi_ratio, rope_mde_ratio, min_rope_half_width, rvs_size), fn) winner_lift = [] if muA == muB: for i, r in enumerate(rope_res): if r in (['A', 'B']): winner_lift.append(rope_lift[i]) helper.fprint( "[ROPE] Fixed - Type 1 error rate: {:.4f}, error_avg_lift: {:.5f}, rope_res: {}" .format(1 - (res.get('U', 0) + res.get('E', 0)) / n_experiments, np.mean(abs(np.array(winner_lift))), res), fn) else: if muA > muB: winner = 'A' else: winner = 'B' for i, r in enumerate(rope_res): if r == winner: winner_lift.append(rope_lift[i]) helper.fprint( "[ROPE] Fixed - Power: {:.4f}, avg_lift: {:.5f}, rope_res: {}". format( res.get(winner, 0) / n_experiments, np.mean(abs(np.array(winner_lift))), res), fn) return {'rope_res': rope_res, 'rope_lift': rope_lift}
def sim_peeking(muA, muB, sample_size, n_experiments, n_peeks=-1, start=None, alpha=0.05, relative_mde_value=0.02, tau_option=3, tau_constant=0.0001, burnIn=0, random_seed=-1, pr_peek=1, pr_fixed=False, fn=None): if random_seed > 0: np.random.seed(random_seed) start_time = time.time() helper.fprint( "[Start time]:{}".format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())), fn) ss = helper.sample_size(muA, relative_mde_value=relative_mde_value) freq_sample_size = ss['variant_sample_size'] if isinstance(start, float): assert (0 < start and start < 1) start = int(freq_sample_size * start) if n_peeks == -1: step = 1 else: step = int((sample_size - start) / (n_peeks - 1)) elif isinstance(start, int): assert (1 <= start and start <= sample_size) if n_peeks == -1: step = 1 else: step = int((sample_size - start) / (n_peeks - 1)) else: if n_peeks == -1: start = 1 step = 1 else: start = int(sample_size / n_peeks) step = start absolute_mde_value = muA * relative_mde_value # print parameter settings helper.fprint( "[Parameters]: muA:{}, muB:{}, sample_size:{:,}, n_experiments:{:,}," " relative_mde_value:{:.5f}, alpha:{:.3f}, tau_option:{}, tau_constant:{}," " burnIn:{:,}, random_seed:{}, n_peeks:{:,}, start:{:,}, step: {:,}". format(muA, muB, sample_size, n_experiments, relative_mde_value, alpha, tau_option, tau_constant, burnIn, random_seed, n_peeks, start, step), fn) sim_data = helper.sim_binomial_seq([muA, muB], sample_size=sample_size, n_experiments=n_experiments) sim_res_data = [] msprt_res = ['U'] * n_experiments msprt_ss = [0] * n_experiments # record the sample size when concluded msprt_effect = [0] * n_experiments msprt_prev_p_val = [1] * n_experiments msprt_prev_ci = [[-1000, 1000]] * n_experiments peek_idx = 0 for n in range(start, sample_size + 1, step): peek_idx += 1 peek_sig_effect = [] peek_effect = [] peek_decision_boundary = [] for s in range(n_experiments): if msprt_res[s] == 'U': sa = sim_data[s][0][n - 1] sb = sim_data[s][1][n - 1] res = msprt_core.calc_stat(sa, n, sb, n, alpha, msprt_prev_p_val[s], msprt_prev_ci[s]) msprt_res[s] = res['msprt_res'] msprt_ss[s] = n msprt_effect[s] = res['effect'] msprt_prev_p_val[s] = res['p_value'] msprt_prev_ci[s] = res['effect_ci'] if res['msprt_res'] != 'U': peek_sig_effect.append(res['effect']) peek_effect.append(res['effect']) peek_decision_boundary.append(res['decision_boundary']) res = pd.Series(msprt_res).value_counts().to_dict() if len(peek_sig_effect) == 0: peek_avg_sig_effect = 0 else: peek_avg_sig_effect = np.mean(abs(np.array(peek_sig_effect))) if len(peek_effect) == 0: peek_avg_effect = 0 else: peek_avg_effect = np.mean(abs(np.array(peek_effect))) if len(peek_decision_boundary) == 0: peek_avg_decision_boundary = 0 else: peek_avg_decision_boundary = np.mean( abs(np.array(peek_decision_boundary))) sim_res_data_dict = {} sim_res_data_dict.update(res) sim_res_data_dict['peek_id'] = peek_idx sim_res_data_dict['samples'] = n sim_res_data_dict['decision_boundary'] = peek_avg_decision_boundary sim_res_data_dict['avg_sig_effect'] = peek_avg_sig_effect sim_res_data_dict['avg_effect'] = peek_avg_effect sim_res_data.append(sim_res_data_dict) # peek summary if pr_peek > 0 and peek_idx % pr_peek == 0: helper.fprint( "[{}]: Peek #{:,} @ {:,} samples," " avg_sig_effect:{:.5f}," " avg_effect:{:.5f}, msprt_res:{}".format( time.strftime("%m-%d %H:%M:%S", time.localtime()), peek_idx, n, peek_avg_sig_effect, peek_avg_effect, res), fn) if 'U' not in msprt_res: break # print parameters settings helper.fprint( "[Parameters]: muA:{}, muB:{}, sample_size:{:,}, n_experiments:{:,}," " relative_mde_value:{:.5f}, alpha:{:.3f}, tau_option:{}, tau_constant:{}," " burnIn:{:,}, random_seed:{}, n_peeks:{:,}, start:{:,}, step:{:,}". format(muA, muB, sample_size, n_experiments, relative_mde_value, alpha, tau_option, tau_constant, burnIn, random_seed, n_peeks, start, step), fn) # print simulation summary winner_ss = [] winner_effect = [] if muA == muB: for i, r in enumerate(msprt_res): if r in (['A', 'B']): winner_effect.append(msprt_effect[i]) helper.fprint( "[mSPRT][{:,} peeks][{:,} samples] - Type 1 error rate:{:.4f}," " error_avg_effect:{:.5f}, msprt_res:{}".format( peek_idx, n, 1 - (res.get('U', 0) + res.get('E', 0)) / n_experiments, np.mean(abs(np.array(winner_effect))), res), fn) else: if muA > muB: winner = 'A' else: winner = 'B' for i, r in enumerate(msprt_res): if r == winner: winner_ss.append(msprt_ss[i]) winner_effect.append(msprt_effect[i]) helper.fprint( "[mSPRT][{:,} peeks][{:,} samples] - Power:{:.4f}, avg_sample_size:{:,.0f}," " sample_size_ratio:{:.2f}%, avg_effect:{:.5f}, msprt_res:{}". format(peek_idx, n, res.get(winner, 0) / n_experiments, np.mean(winner_ss), np.mean(winner_ss) / freq_sample_size * 100, np.mean(abs(np.array(winner_effect))), res), fn) # show fixed sample size result if pr_fixed: sim_fixed(muA, muB, sample_size, n_experiments, sim_data, relative_mde_value, alpha) helper.fprint( "[Elasped time]:{}".format( str(timedelta(seconds=time.time() - start_time))), fn) helper.fprint( "[End time]:{}".format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())), fn) return { 'sim_res_df': pd.DataFrame(sim_res_data), 'msprt_res': msprt_res, 'msprt_effect': msprt_effect }
def sim_peeking(muA, muB, sample_size, n_experiments, n_peeks=-1, start=None, relative_mde_value=0.02, toc_adj_factor=0.00025, pr_peeking=True, pr_fixed=False, fn=None): start_time = time.time() helper.fprint( "[Start time]: {}".format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())), fn) ss = helper.sample_size(muA, mde_value=relative_mde_value, relative_mde=True) freq_sample_size = ss['variant_sample_size'] if isinstance(start, float): assert (0 < start and start < 1) start = int(freq_sample_size * start) if n_peeks == -1: step = 1 else: step = int((sample_size - start) / (n_peeks - 1)) elif isinstance(start, int): assert (1 <= start and start <= sample_size) if n_peeks == -1: step = 1 else: step = int((sample_size - start) / (n_peeks - 1)) else: if n_peeks == -1: start = 1 step = 1 else: start = int(sample_size / n_peeks) step = start helper.fprint( "[Parameters]: muA: {}, muB: {}, sample_size: {:,}, n_experiments: {:,}," " relative_mde_value: {}, toc_adj_factor: {:.5f}, n_peeks: {:,}, start: {:,}, step: {:,}" .format(muA, muB, sample_size, n_experiments, relative_mde_value, toc_adj_factor, n_peeks, start, step), fn) sim_data = helper.sim_binomial_seq([muA, muB], sample_size=sample_size, n_experiments=n_experiments) sim_res_data = [] el_res = ['U'] * n_experiments el_ss = [0] * n_experiments el_lift = [0] * n_experiments peek_idx = 0 for n in range(start, sample_size + 1, step): peek_idx += 1 peek_lift = [] for s in range(n_experiments): if el_res[s] == 'U': sa = sim_data[s][0][n - 1] sb = sim_data[s][1][n - 1] res = el_core.calc_stat(sa, n, sb, n, relative_mde_value, toc_adj_factor) el_res[s] = res['el_res'] el_ss[s] = n el_lift[s] = res['lift'] if res['el_res'] != 'U': peek_lift.append(res['lift']) res = pd.Series(el_res).value_counts().to_dict() peek_avg_lift = np.mean(abs(np.array(peek_lift))) sim_res_data_dict = {} sim_res_data_dict.update(res) sim_res_data_dict['peek_id'] = peek_idx sim_res_data_dict['samples'] = n sim_res_data_dict['avg_lift'] = peek_avg_lift sim_res_data.append(sim_res_data_dict) helper.fprint( "[{}]: Peek #{:,} @ {:,} samples, avg_lift: {:.5f}, el_res: {}". format(time.strftime("%m-%d %H:%M:%S", time.localtime()), peek_idx, n, peek_avg_lift, res), fn) if ('U' not in el_res): break # Summary helper.fprint( "[Parameters]: muA: {}, muB: {}, sample_size: {:,}, n_experiments: {:,}," " relative_mde_value: {}, toc_adj_factor: {:.5f}, n_peeks: {:,}, start: {:,}, step: {:,}" .format(muA, muB, sample_size, n_experiments, relative_mde_value, toc_adj_factor, n_peeks, start, step), fn) winner_ss = [] winner_lift = [] if muA == muB: for i, r in enumerate(el_res): if r in (['A', 'B']): winner_lift.append(el_lift[i]) if pr_peeking: helper.fprint( "[EL] {:,} peeks - Type 1 error rate: {:.4f}, error_avg_lift: {:.5f}, el_res: {}" .format( peek_idx, 1 - (res.get('U', 0) + res.get('E', 0)) / n_experiments, np.mean(abs(np.array(winner_lift))), res), fn) else: if muA > muB: winner = 'A' else: winner = 'B' for i, r in enumerate(el_res): if r == winner: winner_ss.append(el_ss[i]) winner_lift.append(el_lift[i]) if pr_peeking: helper.fprint( "[EL] {:,} peeks - Power: {:.4f}, avg_sample_size: {:,.0f}," "sample_size_ratio: {:.2f}%, avg_lift: {:.5f}, el_res: {}". format(peek_idx, res.get(winner, 0) / n_experiments, np.mean(winner_ss), np.mean(winner_ss) / freq_sample_size * 100, np.mean(abs(np.array(winner_lift))), res), fn) if pr_fixed: sim_fixed(muA, muB, sample_size, n_experiments, sim_data, relative_mde_value) helper.fprint( "[Elasped time]: {}".format( str(timedelta(seconds=time.time() - start_time))), fn) helper.fprint( "[End time]: {}".format( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())), fn) return { 'sim_res_df': pd.DataFrame(sim_res_data), 'el_res': el_res, 'el_lift': el_lift }