Пример #1
0
    def test_median_votes_groupped_by(self):
        data = pd.core.frame.DataFrame([
            {'name': 'Joao', 'party': 'PT', 'state': 'PB', 'poll1': 1},
            {'name': 'Joana', 'party': 'PT', 'state': 'PB', 'poll1': 1},
            {'name': 'Marcio', 'party': 'PT', 'state': 'PB', 'poll1': 0},
            {'name': 'Pedro', 'party': 'PSOL', 'state': 'PE', 'poll1': 0},
        ])
        expected_result = {
            'poll1': {
                'PT': 1,
                'PSOL': 0,
            }
        }
        groupby = 'party'

        rollcall = Rollcall(data)
        result = rollcall.median_votes_groupped_by(groupby)

        self.assertEqual(result.to_dict(), expected_result)
Пример #2
0
    def main(self, csv_path,
             majority_percentual=None, groupby=None,
             metric_method=RiceIndex().calculate_adjusted, **filters):
        """Calculates the adjusted Rice Index polls contained in a CSV

        Args:
            csv_path (string): Path to a CSV file with polls as columns and
                people/groups' votes as rows. The votes should be either 1 for
                YES, 2 for NO, or empty for Not Voted. Example:
                    poll1,poll2,poll3
                    0,,1
                    1,1,1
            majority_percentual (float): Removes votes where the majority was
                greater than this percentual. Defaults to None.
            groupby (string): Column on the metadata to group the votes by.
                This is useful when you want to compare a larger party with a
                smaller one. You would set groupby = "party", so this method
                will get each party's most common vote to calculate the
                cohesion. Defaults to None.
            metric_method (function): Method that receives a list of votes and
                returns a score. For an example, check RiceIndex().calculate.
                Defaults to RiceIndex().calculate_adjusted.
            filters (kwargs): dict of filters to limit which votes we consider
                when calculating the metric. Defaults to None.

        Returns:
            list(OrderedDict): A list of dicts with each poll name in the keys
                and the resulting metric score in the values. If there's no
                `metric_method`, it'll simply remove unanimous votes, apply
                groups and filters.
        """
        votes = Rollcall.from_csv(csv_path)\
                        .remove_unanimous_votes(majority_percentual)\
                        .filter(filters)\
                        .median_votes_groupped_by(groupby)

        if metric_method:
            metrics = self.calculate_metric(votes, metric_method)
            return [collections.OrderedDict(zip(votes.columns, metrics))]

        if groupby:
            votes.insert(0, groupby, votes.index)

        rows = []
        columns = votes.columns.tolist()
        replace_nan_with_none = lambda df: df.where(pd.notnull(df), None)
        for row in replace_nan_with_none(votes).itertuples(False):
            row = [self._convert_to_int_if_possible(r) for r in row]
            rows.append(collections.OrderedDict(zip(columns, row)))
        return rows