def test_apply_rules(param_dict):

    b = BallotMarks(param_dict['input']['marks'])
    b.apply_rules(**param_dict['input']['rules'])

    assert b.marks == param_dict['expected']['marks']
    assert b.unique_marks == param_dict['expected']['unique_marks']
    assert b.unique_candidates == param_dict['expected']['unique_candidates']
def test_combine_writein_marks(param_dict):

    b = BallotMarks(param_dict['input'])
    computed = BallotMarks.combine_writein_marks(b)

    assert computed.marks == param_dict['expected']['marks']
    assert computed.unique_marks == param_dict['expected']['unique_marks']
    assert computed.unique_candidates == param_dict['expected'][
        'unique_candidates']
def test_remove_duplicate_marks(param_dict):

    b = BallotMarks(param_dict['input'])
    computed = BallotMarks.remove_duplicate_candidate_marks(b)

    assert computed.marks == param_dict['expected']['marks']
    assert computed.unique_marks == param_dict['expected']['unique_marks']
    assert computed.unique_candidates == param_dict['expected'][
        'unique_candidates']
def test_update_marks():

    marks = ['A', 'B', 'B', BallotMarks.OVERVOTE, BallotMarks.SKIPPED]
    unique_marks = {'A', 'B', BallotMarks.OVERVOTE, BallotMarks.SKIPPED}
    unique_candidates = {'A', 'B'}

    b = BallotMarks()
    b.update_marks(marks)

    assert b.marks == marks
    assert b.unique_marks == unique_marks
    assert b.unique_candidates == unique_candidates
Пример #5
0
    def _make_candidate_set(self, rule_set_name: str) -> None:

        if rule_set_name not in self._rule_sets:
            raise RuntimeError(f'rule set {rule_set_name} has not yet been added using add_rule_set().')

        cvr = self._parsed_cvr

        # unpack rules
        rule_set = self._rule_sets[rule_set_name]
        combine_writeins = rule_set['combine_writein_marks']
        exclude_writeins = rule_set['exclude_writein_marks']

        candidate_ballot_marks = BallotMarks(set.union(*[b.unique_candidates for b in cvr['ballot_marks']]))
        candidate_ballot_marks.apply_rules(combine_writein_marks=combine_writeins, exclude_writein_marks=exclude_writeins)

        self._candidate_sets.update({rule_set_name: candidate_ballot_marks})
Пример #6
0
    def _clean_round(self) -> None:
        """
        Remove any newly inactivated candidates from the ballot ranks. But only remove previous round winners
        from specified ballots
        """
        winners = []
        for inactive_cand in self._inactive_candidates:
            if inactive_cand not in self._removed_candidates:
                if inactive_cand in self._round_winners:
                    winners.append(inactive_cand)
                    self._removed_candidates.append(inactive_cand)
                else:
                    self._contest_cvr_ld = [{
                        'ballot_marks':
                        BallotMarks.remove_mark(b['ballot_marks'],
                                                [inactive_cand]),
                        'weight':
                        b['weight'],
                        'weight_distrib':
                        b['weight_distrib']
                    } for b in self._contest_cvr_ld]
                    self._removed_candidates.append(inactive_cand)

        # remove all other candidates before winners. This mostly matters in the first round when all
        # zero-vote candidates are removed. That is the only time a loser and a winner might both be inactivated in the
        # same round. It may also happen in the last round, but transfer calculations at that point have no impact.
        remove_bool_lists = []
        for winner in winners:
            remove_bool_lists.append(
                self._removal_ballots(winner, default_as_true=True))

        for winner, to_remove in zip(winners, remove_bool_lists):
            self._contest_cvr_ld = [
                {
                    'ballot_marks':
                    BallotMarks.remove_mark(b['ballot_marks'],
                                            [inactive_cand]),
                    'weight':
                    b['weight'],
                    'weight_distrib':
                    b['weight_distrib']
                } if is_remove else b
                for b, is_remove in zip(self._contest_cvr_ld, to_remove)
            ]
Пример #7
0
    def __init__(self,
                 jurisdiction: str = "",
                 state: str = "",
                 year: str = "",
                 date: str = "",
                 office: str = "",
                 notes: str = "",
                 parser_func: Optional[Callable] = None,
                 parser_args: Optional[Dict] = None,
                 parsed_cvr: Optional[Dict] = None,
                 split_fields: Optional[List] = None) -> None:

        # ID INFO
        self.jurisdiction = jurisdiction
        self.state = state
        self.date = date
        self.year = year
        self.office = office
        self.notes = notes
        self.split_fields = split_fields
        self.unique_id = self._unique_id()

        self._id_df = pd.DataFrame({
            'jurisdiction': [self.jurisdiction],
            'state': [self.state],
            'date': [self.date],
            'year': [self.year],
            'office': [self.office],
            'notes': [self.notes],
            'unique_id': [self.unique_id]
        })

        self._parsed_cvr = self._prepare_parsed_cvr(parser_func=parser_func,
                                                    parser_args=parser_args,
                                                    parsed_cvr=parsed_cvr)
        self._modified_cvrs = {}
        self._candidate_sets = {}
        self._rule_sets = {}

        # make a default rule set that is just the parsed cvr
        self._default_rule_set_name = '__cvr'
        self.add_rule_set(self._default_rule_set_name, BallotMarks.new_rule_set())

        # STAT INFO

        self._cvr_stat_table = None
        self._compute_cvr_stat_table()

        self._summary_cvr_stat_table = None
        self._compute_summary_cvr_stat_table()

        self._split_filter_dict = {}
        self._summary_cvr_split_stat_table = None
Пример #8
0
 def _clean_round(self) -> None:
     """
     Remove any newly inactivated candidates from the ballot ranks.
     """
     for inactive_cand in self._inactive_candidates:
         if inactive_cand not in self._removed_candidates:
             self._contest_cvr_ld = [{
                 'ballot_marks':
                 BallotMarks.remove_mark(b['ballot_marks'],
                                         [inactive_cand]),
                 'weight':
                 b['weight'],
                 'weight_distrib':
                 b['weight_distrib']
             } for b in self._contest_cvr_ld]
             self._removed_candidates.append(inactive_cand)
Пример #9
0
    def _prepare_parsed_cvr(self,
                            parser_func: Optional[Callable] = None,
                            parser_args: Optional[Dict] = None,
                            parsed_cvr: Optional[Dict[str, List]] = None) -> Dict[str, List]:

        if parser_func and parser_args:
            parsed_cvr = parser_func(**parser_args)

        if not parsed_cvr:
            raise ValueError('if no parser_func and parser_args are passed, a parsed_cvr must be passed.')

        # if parser returns a list, assume it is rank list of lists
        if isinstance(parsed_cvr, list):
            parsed_cvr = {
                'ranks': parsed_cvr,
                'weight': [decimal.Decimal('1')] * len(parsed_cvr)
            }

        if 'ranks' not in parsed_cvr:
            raise RuntimeError('Parsed CVR does not contain field "ranks"')

        if len(parsed_cvr['ranks']) == 0:
            raise RuntimeError('parsed ranks list is empty.')

        if 'weight' not in parsed_cvr:
            parsed_cvr['weight'] = [decimal.Decimal('1') for _ in parsed_cvr['ranks']]

        if not isinstance(parsed_cvr['weight'][0], decimal.Decimal):
            parsed_cvr['weight'] = [decimal.Decimal(str(i)) for i in parsed_cvr['weight']]

        parsed_cvr['ballot_marks'] = [BallotMarks(ranks) for ranks in parsed_cvr['ranks']]
        del parsed_cvr['ranks']

        ballot_lengths = collections.Counter(len(b.marks) for b in parsed_cvr['ballot_marks'])
        if len(ballot_lengths) > 1:
            raise RuntimeError(f'Parsed CVR contains ballots with unequal length rank lists. {str(ballot_lengths)}')

        field_lengths = {k: len(parsed_cvr[k]) for k in parsed_cvr}
        if len(set(field_lengths.values())) > 1:
            raise RuntimeError(f'Parsed CVR contains fields of unequal length. {str(field_lengths)}')

        return parsed_cvr
def test_remove_duplicate_marks_errors(error_type, input):

    with pytest.raises(error_type):
        BallotMarks.remove_duplicate_candidate_marks(input)
def test_combine_writein_marks_errors(error_type, input):

    with pytest.raises(error_type):
        BallotMarks.combine_writein_marks(input)
def test_check_writein_match(param_dict):

    computed = BallotMarks.check_writein_match(param_dict['input'])
    expected = param_dict['expected']
    assert expected == computed
    assert computed.marks == param_dict['expected']['marks']
    assert computed.unique_marks == param_dict['expected']['unique_marks']
    assert computed.unique_candidates == param_dict['expected'][
        'unique_candidates']


param_dicts = [
    ({
        'input': {
            'marks': [
                'A', 'B', 'B', 'writein10', 'Tuwi', BallotMarks.WRITEIN, 'uwi',
                BallotMarks.OVERVOTE
            ],
            'rules':
            BallotMarks.new_rule_set()
        },
        'expected': {
            'marks': [
                'A', 'B', 'B', 'writein10', 'Tuwi', BallotMarks.WRITEIN, 'uwi',
                BallotMarks.OVERVOTE
            ],
            'unique_marks': {
                'A', 'B', 'writein10', 'Tuwi', BallotMarks.WRITEIN, 'uwi',
                BallotMarks.OVERVOTE
            },
            'unique_candidates':
            {'A', 'B', 'writein10', 'Tuwi', 'uwi', BallotMarks.WRITEIN}
        }
    }),
    ({
Пример #14
0
    def _compute_cvr_stat_table(self) -> None:

        cvr = self.get_cvr_dict()
        candidates = self.get_candidates()

        df = pd.DataFrame()
        df['weight'] = cvr['weight']

        df['valid_ranks_used'] = [len(b.unique_candidates) for b in cvr['ballot_marks']]
        df['ranks_used_times_weight'] = df['valid_ranks_used'] * df['weight']

        df['used_last_rank'] = [True if b.marks[-1] != BallotMarks.SKIPPED else False for b in cvr['ballot_marks']]

        df['undervote'] = [b.unique_marks == {BallotMarks.SKIPPED} for b in cvr['ballot_marks']]
        df['ranked_single'] = df['valid_ranks_used'] == 1
        df['ranked_multiple'] = df['valid_ranks_used'] > 1
        df['ranked_3_or_more'] = df['valid_ranks_used'] > 2

        ballot_marks_no_skipped = [BallotMarks.remove_mark(b, [BallotMarks.SKIPPED]) for b in cvr['ballot_marks']]
        first_round = [b.marks[0] if b.marks else 'NA' for b in ballot_marks_no_skipped]
        df['first_round'] = pd.Series(first_round, dtype='category')

        df['first_round_overvote'] = df['first_round'].eq(BallotMarks.OVERVOTE)

        df['contains_overvote'] = [BallotMarks.OVERVOTE in b.unique_marks for b in cvr['ballot_marks']]

        # contains_skipped
        # {SKIPVOTE} & {x} - {y}
        # this checks that x == SKIPVOTE and that y then != SKIPVOTE
        # (the y check is important to know whether or not the ballot contains marks
        # following the skipped rank)
        df['contains_skip'] = [any({BallotMarks.SKIPPED} & {x} - {y} for x, y in zip(b.marks, b.marks[1:]))
                               for b in cvr['ballot_marks']]

        # contains_duplicate
        # remove overvotes and undervotes
        dup_check = [BallotMarks.remove_mark(b, [BallotMarks.SKIPPED, BallotMarks.OVERVOTE]) for b in cvr['ballot_marks']]
        # count all ranks for candidates
        counters = [collections.Counter(b.marks) for b in dup_check]
        # check if any candidates were ranked more than once
        df['contains_duplicate'] = [max(counter.values()) > 1 if counter else False for counter in counters]

        irregular_condtions = ['contains_overvote', 'contains_skip', 'contains_duplicate']
        df['irregular'] = df[irregular_condtions].any(axis='columns')

        # fully_ranked
        candidates_combined_writeins = BallotMarks.combine_writein_marks(candidates)
        candidates_excluded_writeins = BallotMarks.remove_mark(candidates_combined_writeins, [BallotMarks.WRITEIN])
        candidate_set = candidates_excluded_writeins.unique_candidates

        ballot_marks_cleaned = [BallotMarks.remove_mark(b, [BallotMarks.OVERVOTE, BallotMarks.SKIPPED])
                                for b in cvr['ballot_marks']]
        ballot_marks_cleaned = [BallotMarks.remove_duplicate_candidate_marks(b) for b in ballot_marks_cleaned]

        fully_ranked = [(set(b.marks) & candidate_set) == candidate_set or
                        # voters ranked every possible candidate
                        len(a.marks) == len(b.marks)
                        # or did not, had no skipped ranks, overvotes, or duplicates
                        for a, b in zip(cvr['ballot_marks'], ballot_marks_cleaned)]
        df['fully_ranked'] = fully_ranked

        self._cvr_stat_table = df
Пример #15
0
    def _compute_summary_cvr_stat_table(self) -> None:

        cvr = self.get_cvr_dict()
        candidates = self.get_candidates()

        s = pd.Series(dtype=object)

        candidates_no_writeins = BallotMarks.remove_mark(BallotMarks.combine_writein_marks(candidates), [BallotMarks.WRITEIN])
        s['n_candidates'] = len(candidates_no_writeins.marks)

        s['rank_limit'] = len(cvr['ballot_marks'][0].marks)
        s['restrictive_rank_limit'] = True if s['rank_limit'] < (s['n_candidates'] - 1) else False

        # first_round_overvote
        # The number of ballots with an overvote before any valid ranking. (weighted)

        # Note that this is not the same as "exhausted by overvote". This is because
        # some jurisdictions (Maine) discard any ballot beginning with two
        # skipped rankings, and call this ballot as exhausted by skipped rankings, even if the
        # skipped rankings are followed by an overvote.

        # Other jursidictions (Minneapolis) simply skip over overvotes in a ballot.
        s['first_round_overvote'] = self._cvr_stat_table.loc[self._cvr_stat_table['first_round_overvote'], 'weight'].sum()

        # The number of voters that validly used only a single ranking. (weighted)
        s['ranked_single'] = self._cvr_stat_table.loc[self._cvr_stat_table['ranked_single'], 'weight'].sum()

        # The number of voters that validly used 3 or more rankings. (weighted)
        s['ranked_3_or_more'] = self._cvr_stat_table.loc[self._cvr_stat_table['ranked_3_or_more'], 'weight'].sum()

        # The number of voters that validly use more than one ranking. (weighted)
        s['ranked_multiple'] = self._cvr_stat_table.loc[self._cvr_stat_table['ranked_multiple'], 'weight'].sum()

        # The number of voters that have validly used all available rankings on the
        # ballot, or that have validly ranked all non-write-in candidates. (weighted)
        s['total_fully_ranked'] = self._cvr_stat_table.loc[self._cvr_stat_table['fully_ranked'], 'weight'].sum()

        # The number of ballots that rank the same candidate more than once. (weighted)
        s['includes_duplicate_ranking'] = self._cvr_stat_table.loc[self._cvr_stat_table['contains_duplicate'], 'weight'].sum()

        # The number of ballots that have an skipped ranking followed by any other marked ranking. (weighted)
        s['includes_skipped_ranking'] = self._cvr_stat_table.loc[self._cvr_stat_table['contains_skip'], 'weight'].sum()

        # This includes ballots with no marks. (weighted)
        s['total_ballots'] = self._cvr_stat_table['weight'].sum()

        # Number of ballots that either had a multiple ranking, overvote,
        # or a skipped ranking (only those followed by a mark). This includes ballots even where the irregularity was not
        # the cause of exhaustion. (weighted)
        s['total_irregular'] = self._cvr_stat_table.loc[self._cvr_stat_table['irregular'], 'weight'].sum()

        # Number of ballots with at least one overvote. Not necessarily cause of exhaustion. (weighted)
        s['includes_overvote_ranking'] = self._cvr_stat_table.loc[self._cvr_stat_table['contains_overvote'], 'weight'].sum()

        # Ballots completely made up of skipped rankings (no marks). (weighted)
        s['total_undervote'] = self._cvr_stat_table.loc[self._cvr_stat_table['undervote'], 'weight'].sum()

        # Mean number of validly used rankings across all non-undervote ballots. (weighted)
        weighted_sum = self._cvr_stat_table.loc[~self._cvr_stat_table['undervote'], 'ranks_used_times_weight'].sum()
        s['mean_rankings_used'] = weighted_sum / self._cvr_stat_table.loc[~self._cvr_stat_table['undervote'], 'weight'].sum()

        # Median number of validly used rankings across all non-undervote ballots. (weighted)
        # s['median_rankings_used'] = self._cvr_stat_table.loc[~self._cvr_stat_table['undervote'], 'ranks_used_times_weight'].median()

        ranks_used = self._cvr_stat_table.loc[~self._cvr_stat_table['undervote'], 'valid_ranks_used'].tolist()
        weights = self._cvr_stat_table.loc[~self._cvr_stat_table['undervote'], 'weight'].tolist()
        weights_float = [float(i) for i in weights]
        s['median_rankings_used'] = weightedstats.weighted_median(ranks_used, weights=weights_float)

        self._summary_cvr_stat_table = s.to_frame().transpose()
Пример #16
0
    def __init__(
            self,
            exhaust_on_duplicate_candidate_marks: bool = False,
            exhaust_on_overvote_marks: bool = False,
            exhaust_on_repeated_skipped_marks: bool = False,
            treat_combined_writeins_as_exhaustable_duplicates: bool = True,
            combine_writein_marks: bool = True,
            exclude_writein_marks: bool = False,
            n_winners: Optional[int] = None,
            multi_winner_rounds: Optional[bool] = None,
            *args,
            **kwargs) -> None:

        # INIT CVR
        super().__init__(*args, **kwargs)

        # APPLY CONTEST RULES
        self._contest_rule_set_name = '__contest'
        self.add_rule_set(
            self._contest_rule_set_name,
            BallotMarks.new_rule_set(
                combine_writein_marks=combine_writein_marks,
                exclude_writein_marks=exclude_writein_marks,
                exclude_duplicate_candidate_marks=True,
                exclude_overvote_marks=True,
                exclude_skipped_marks=True,
                treat_combined_writeins_as_exhaustable_duplicates=
                treat_combined_writeins_as_exhaustable_duplicates,
                exhaust_on_duplicate_candidate_marks=
                exhaust_on_duplicate_candidate_marks,
                exhaust_on_overvote_marks=exhaust_on_overvote_marks,
                exhaust_on_repeated_skipped_marks=
                exhaust_on_repeated_skipped_marks))

        # CONTEST INPUTS
        self._n_winners = n_winners
        self._multi_winner_rounds = multi_winner_rounds
        self._contest_candidates = self.get_candidates(
            self._contest_rule_set_name)
        self._contest_cvr_ld = None
        self._reset_ballots()

        # INIT STATE INFO

        # contest-level
        self._tab_num = 0
        self._tabulations = []

        # tabulation-level
        self._inactive_candidates = []
        self._removed_candidates = []

        # round-level
        self._round_num = 0
        self._round_winners = []
        self._round_loser = None

        # RUN
        self._run_contest()

        # CONTEST STATS
        self._contest_stat_table = None
        self._compute_contest_stat_table()

        self._summary_contest_stat_tables = None
        self._compute_summary_contest_stat_tables()

        self._summary_contest_split_stat_tables = None
def test_constructor_errors(error_type, input):

    with pytest.raises(error_type):
        BallotMarks(input)
def test_apply_rules_errors():

    with pytest.raises(RuntimeError):
        b = BallotMarks(['A', 'B', 'C'])
        b.apply_rules()
        b.apply_rules()
def test_remove_mark_errors(error_type, input1, input2):

    with pytest.raises(error_type):
        BallotMarks.remove_mark(input1, input2)