示例#1
0
def compute_risk(e, mid, sn_tcpra, trials=None):
    """ 
    Compute (estimate) Bayesian risk (chance that reported 
    outcome is wrong for contest e.cid_m[mid]).
    sn_tcpra is sampled number: stage_time->cid->pbcid->rvote->avote->count
    We take sn_tcpra here as argument rather than just use e.sn_tcpra so
    we can call compute_contest_risk with modified sample counts.
    (This option not yet used, but might be later, when optimizing
    workload.)
    Here sn_tcpra is identical in structure to (and may in fact be
    identical to) e.sn_tcpra.
    Here trials is the number of trials to run to obtain the desired
    precision in the risk estimate.

    This method is the heart of the Bayesian post-election audit method.
    But it could be replaced by a frequentist approach instead, at
    least for those outcome rules and mixes of collection types for
    which a frequentist method is known.

    The comparison and ballot-polling audits are blended here; the
    reported election data just records a ("-noCVR",) vote for the 
    reported vote in a noCVR paper ballot collection.

    This means that ballot-polling audits have a prior of pseudocount_base,
    while comparison audits have a prior of pseudocount_base for off-diagonal
    (non-equal reported and actual) vote pairs, but a prior of pseudocount_match
    for equal reported-vote and actual-vote pairs.
    """

    cid = e.cid_m[mid]
    wrong_outcome_count = 0
    if trials == None:
        trials = e.n_trials
    for trial in range(trials):
        test_tally = {vote: 0 for vote in e.votes_c[cid]}
        for pbcid in sorted(e.possible_pbcid_c[cid]):
            # Draw from posterior for each paper ballot collection, sum them.
            # Stratify by reported vote.
            for rv in sorted(sn_tcpra[e.stage_time][cid][pbcid]):
                tally = sn_tcpra[e.stage_time][cid][pbcid][rv].copy()
                for av in e.votes_c[cid]:
                    tally[av] = tally.get(av, 0)
                    tally[av] += (e.pseudocount_match
                                  if av == rv else e.pseudocount_base)
                dirichlet_dict = dirichlet(tally)
                stratum_size = e.rn_cpr[cid][pbcid][rv]
                # sample_size = sn_tcpr[e.stage_time][cid][pbcid][rv]
                sample_size = sum([
                    sn_tcpra[e.stage_time][cid][pbcid][rv][av]
                    for av in sn_tcpra[e.stage_time][cid][pbcid][rv]
                ])
                nonsample_size = stratum_size - sample_size
                for av in sorted(tally):
                    test_tally[av] += tally[av]
                    test_tally[av] += dirichlet_dict[av] * nonsample_size
        if e.ro_c[cid] != outcomes.compute_outcome(e, cid, test_tally):
            wrong_outcome_count += 1
    risk = wrong_outcome_count / e.n_trials
    e.risk_tm[e.stage_time][mid] = risk
    return risk
示例#2
0
def get_noisy_guess(e,
                    mid,
                    pbcids,
                    actual_votes,
                    xs,
                    nonsample_sizes,
                    num_trials=100):
    """
    Use Dirichlet a certain number of times, to measure the probability that
    a winner that isn't the reported winner wins in the overall election.
    """
    winners = []
    cid = e.cid_m[mid]
    for _ in range(num_trials):
        current_sample = copy.deepcopy(actual_votes)

        for pbcid in actual_votes:
            for av in current_sample[pbcid]:
                if current_sample[pbcid][av] == 0:
                    current_sample[pbcid][av] += 50  # pseudocount
            dirichlet_dict = risk_bayes.dirichlet(current_sample[pbcid])
            extended_sample = risk_bayes.multinomial(xs[pbcid], dirichlet_dict)
            for av in current_sample[pbcid]:
                current_sample[pbcid][av] += extended_sample[av]

        for pbcid in actual_votes:
            dirichlet_dict = risk_bayes.dirichlet(current_sample[pbcid])
            extended_sample = risk_bayes.multinomial(
                nonsample_sizes[pbcid] - xs[pbcid], dirichlet_dict)
            for av in current_sample[pbcid]:
                current_sample[pbcid][av] += extended_sample[av]

        merged_sample = {}
        for pbcid in pbcids:
            for av in current_sample[pbcid]:
                if av not in merged_sample:
                    merged_sample[av] = 0
                merged_sample[av] += current_sample[pbcid][av]

        if outcomes.compute_outcome(e, cid, merged_sample) == e.ro_c[cid]:
            winners.append(1)
    return abs(float(num_trials - len(winners)) / len(winners) - 0.05)
示例#3
0
def get_sample_size(e,
                    pbcids_to_adjust,
                    init_x=1,
                    pick_pbcid_func=round_robin):
    """
    Get sample size, for a given county, given how many ballots have been sampled before, and the number left
    to audit, as well as the required risk limit.
    """
    default_start_pbcid = 0
    start = None
    num_winners = e.num_winners
    max_num_it = e.max_num_it
    for mid in e.cid_m:
        cid = e.cid_m[mid]

        xs, actual_votes, nonsample_sizes = create_helper_dicts(
            e, mid, init_x, pbcids_to_adjust)

        # For max_num_it iterations, we first choose a county, then, we extend the county
        # by x. Then, given this extended sample, we use it to extend the entire contest to
        # n votes. We calculate the winner of the extended contest - if all the winners are
        # correct, then we update the x for that pbcid, by possibly decreasing it. If not,
        # with some probability, we increase x for that county.
        for i in range(max_num_it):
            current_sample = copy.deepcopy(
                actual_votes)  # pbcid -> av -> count
            if pick_pbcid_func == random_min_var:
                pbcid = pick_pbcid_func(pbcids_to_adjust, actual_votes, xs,
                                        nonsample_sizes)
            elif pick_pbcid_func == round_robin:
                if start is None:
                    start = default_start_pbcid
                pbcid = pick_pbcid_func(pbcids_to_adjust, start)
                start += 1
                start = (start % len(pbcids_to_adjust))
            else:
                pbcid = pick_pbcid_func(pbcids_to_adjust)
            for av in current_sample[pbcid]:
                if current_sample[pbcid][av] == 0:
                    current_sample[pbcid][av] += 50  # pseudocount
            dirichlet_dict = risk_bayes.dirichlet(current_sample[pbcid])
            extended_sample = risk_bayes.multinomial(xs[pbcid], dirichlet_dict)
            for av in current_sample[pbcid]:
                current_sample[pbcid][av] += extended_sample[av]

            for k, pbcid in enumerate(pbcids_to_adjust):
                dirichlet_dict = risk_bayes.dirichlet(current_sample[pbcid])
                extended_sample = risk_bayes.multinomial(
                    nonsample_sizes[pbcid] - xs[pbcid], dirichlet_dict)
                for av in current_sample[pbcid]:
                    current_sample[pbcid][av] += extended_sample[av]

            merged_sample = {}
            for pbcid in pbcids_to_adjust:
                for av in current_sample[pbcid]:
                    if av not in merged_sample:
                        merged_sample[av] = 0
                    merged_sample[av] += current_sample[pbcid][av]

            winners = []
            for k in range(num_winners):
                winners.append(outcomes.compute_outcome(e, cid, merged_sample))
            if len(set(winners)) == 1 and winners[0] == e.ro_c[cid]:
                xs = update_correct(xs, [pbcid], nonsample_sizes, num_winners,
                                    e.risk_limit_m[mid])
            else:
                xs = update_incorrect(xs, [pbcid], nonsample_sizes,
                                      num_winners, e.risk_limit_m[mid])
    return xs