def SPRT_elo(R, alpha=0.05, beta=0.05, p=0.05, elo0=None, elo1=None, elo_model=None): """ Calculate an elo estimate from an SPRT test.""" assert elo_model in ["BayesElo", "logistic"] # Estimate drawelo out of sample R3 = LLRcalc.regularize([R["losses"], R["draws"], R["wins"]]) drawelo = draw_elo_calc(R3) # Convert the bounds to logistic elo if necessary if elo_model == "BayesElo": lelo0, lelo1 = [bayeselo_to_elo(elo_, drawelo) for elo_ in (elo0, elo1)] else: lelo0, lelo1 = elo0, elo1 # Make the elo estimation object sp = sprt.sprt(alpha=alpha, beta=beta, elo0=lelo0, elo1=lelo1) # Feed the results if "pentanomial" in R.keys(): R_ = R["pentanomial"] else: R_ = R3 sp.set_state(R_) # Get the elo estimates a = sp.analytics(p) # Override the LLR approximation with the exact one a["LLR"] = LLRcalc.LLR_logistic(lelo0, lelo1, R_) del a["clamped"] # Now return the estimates return a
def update_SPRT(R, sprt): """Sequential Probability Ratio Test sprt is a dictionary with fixed fields 'elo0', 'alpha', 'elo1', 'beta', 'elo_model', 'lower_bound', 'upper_bound'. It also has the following fields 'llr', 'state', 'overshoot' which are updated by this function. Normally this function should be called after each finished game (trinomial) or game pair (pentanomial) but it is safe to call it multiple times with the same parameters. Skipped updates are also handled sensibly. The meaning of the inputs and the fields is as follows. H0: elo = elo0 H1: elo = elo1 alpha = max typeI error (reached on elo = elo0) beta = max typeII error for elo >= elo1 (reached on elo = elo1) 'overshoot' is a dictionary with data for dynamic overshoot estimation. The theoretical basis for this is: Siegmund - Sequential Analysis - Corollary 8.33. The correctness can be verified by simulation https://github.com/vdbergh/simul R['wins'], R['losses'], R['draws'] contains the number of wins, losses and draws R['pentanomial'] contains the pentanomial frequencies elo_model can be either 'BayesElo' or 'logistic' """ # the next two lines are superfluous, but necessary for backward compatibility sprt['lower_bound'] = math.log(sprt['beta'] / (1 - sprt['alpha'])) sprt['upper_bound'] = math.log((1 - sprt['beta']) / sprt['alpha']) elo_model = sprt.get('elo_model', 'BayesElo') assert (elo_model in ['BayesElo', 'logistic']) elo0 = sprt['elo0'] elo1 = sprt['elo1'] # first deal with the legacy BayesElo/trinomial models R3 = LLRcalc.regularize([R['losses'], R['draws'], R['wins']]) if elo_model == 'BayesElo': # estimate drawelo out of sample drawelo = draw_elo_calc(R3) # conversion of bounds to logistic elo lelo0, lelo1 = [bayeselo_to_elo(elo, drawelo) for elo in (elo0, elo1)] else: lelo0, lelo1 = elo0, elo1 # Log-Likelihood Ratio R_ = R.get('pentanomial', R3) sprt['llr'] = LLRcalc.LLR_logistic(lelo0, lelo1, R_) # update the overshoot data if 'overshoot' in sprt: LLR_ = sprt['llr'] o = sprt['overshoot'] num_samples = sum(R_) if num_samples < o['last_update']: # purge? sprt['lost_samples'] = o['last_update'] - num_samples # audit del sprt['overshoot'] # the contract is violated else: if num_samples == o['last_update']: # same data pass elif num_samples == o['last_update'] + 1: # the normal case if LLR_ < o['ref0']: delta = LLR_ - o['ref0'] o['m0'] += delta o['sq0'] += delta**2 o['ref0'] = LLR_ if LLR_ > o['ref1']: delta = LLR_ - o['ref1'] o['m1'] += delta o['sq1'] += delta**2 o['ref1'] = LLR_ else: # Be robust if some updates are lost: reset data collection. # This should not be needed anymore, but just in case... o['ref0'] = LLR_ o['ref1'] = LLR_ o['skipped_updates'] += (num_samples - o['last_update']) - 1 # audit o['last_update'] = num_samples o0 = 0 o1 = 0 if 'overshoot' in sprt: o = sprt['overshoot'] o0 = -o['sq0'] / o['m0'] / 2 if o['m0'] != 0 else 0 o1 = o['sq1'] / o['m1'] / 2 if o['m1'] != 0 else 0 # now check the stop condition sprt['state'] = '' if sprt['llr'] < sprt['lower_bound'] + o0: sprt['state'] = 'rejected' elif sprt['llr'] > sprt['upper_bound'] - o1: sprt['state'] = 'accepted'
def update_SPRT(R, sprt): """Sequential Probability Ratio Test sprt is a dictionary with fixed fields 'elo0', 'alpha', 'elo1', 'beta', 'elo_model', 'lower_bound', 'upper_bound', 'batch_size' It also has the following fields 'llr', 'state', 'overshoot' which are updated by this function. Normally this function should be called each time 'batch_size' games (trinomial) or game pairs (pentanomial) have been completed but it is safe to call it multiple times with the same parameters. The main purpose of this is to be able to recalculate the LLR for old tests. In the unlikely event of a server crash it is possible that some updates may be missed but this situation is also handled sensibly. The meaning of the other inputs and the fields is as follows. H0: elo = elo0 H1: elo = elo1 alpha = max typeI error (reached on elo = elo0) beta = max typeII error for elo >= elo1 (reached on elo = elo1) 'overshoot' is a dictionary with data for dynamic overshoot estimation. The theoretical basis for this is: Siegmund - Sequential Analysis - Corollary 8.33. The correctness can be verified by simulation https://github.com/vdbergh/simul R['wins'], R['losses'], R['draws'] contains the number of wins, losses and draws R['pentanomial'] contains the pentanomial frequencies elo_model can be either 'BayesElo', 'logistic' or 'normalized'""" # the next two lines are superfluous, but unfortunately necessary for backward # compatibility with old tests sprt["lower_bound"] = math.log(sprt["beta"] / (1 - sprt["alpha"])) sprt["upper_bound"] = math.log((1 - sprt["beta"]) / sprt["alpha"]) elo_model = sprt.get("elo_model", "BayesElo") assert elo_model in ["BayesElo", "logistic", "normalized"] elo0 = sprt["elo0"] elo1 = sprt["elo1"] # first deal with the legacy BayesElo/trinomial models R3 = [R.get("losses", 0), R.get("draws", 0), R.get("wins", 0)] if elo_model == "BayesElo": # estimate drawelo out of sample R3_ = LLRcalc.regularize(R3) drawelo = draw_elo_calc(R3_) # conversion of bounds to logistic elo elo0, elo1 = [bayeselo_to_elo(elo, drawelo) for elo in (elo0, elo1)] elo_model = "logistic" R_ = R.get("pentanomial", R3) batch_size = sprt.get("batch_size", 1) # sanity check on batch_size if sum(R_) % batch_size != 0: sprt["illegal_update"] = sum(R_) # audit if "overshoot" in sprt: del sprt["overshoot"] # the contract is violated # Log-Likelihood Ratio assert elo_model in ["logistic", "normalized"] if elo_model == "logistic": sprt["llr"] = LLRcalc.LLR_logistic(elo0, elo1, R_) else: sprt["llr"] = LLRcalc.LLR_normalized(elo0, elo1, R_) # update the overshoot data if "overshoot" in sprt: LLR_ = sprt["llr"] o = sprt["overshoot"] num_samples = sum(R_) if num_samples < o["last_update"]: # purge? sprt["lost_samples"] = o["last_update"] - num_samples # audit del sprt["overshoot"] # the contract is violated else: if num_samples == o["last_update"]: # same data pass elif num_samples == o[ "last_update"] + batch_size: # the normal case if LLR_ < o["ref0"]: delta = LLR_ - o["ref0"] o["m0"] += delta o["sq0"] += delta**2 o["ref0"] = LLR_ if LLR_ > o["ref1"]: delta = LLR_ - o["ref1"] o["m1"] += delta o["sq1"] += delta**2 o["ref1"] = LLR_ else: # Be robust if some updates are lost: reset data collection. # This should not be needed anymore, but just in case... o["ref0"] = LLR_ o["ref1"] = LLR_ o["skipped_updates"] += (num_samples - o["last_update"]) - 1 # audit o["last_update"] = num_samples o0 = 0 o1 = 0 if "overshoot" in sprt: o = sprt["overshoot"] o0 = -o["sq0"] / o["m0"] / 2 if o["m0"] != 0 else 0 o1 = o["sq1"] / o["m1"] / 2 if o["m1"] != 0 else 0 # now check the stop condition sprt["state"] = "" if sprt["llr"] < sprt["lower_bound"] + o0: sprt["state"] = "rejected" elif sprt["llr"] > sprt["upper_bound"] - o1: sprt["state"] = "accepted"