def test_median_votes_groupped_by(self): data = pd.core.frame.DataFrame([ {'name': 'Joao', 'party': 'PT', 'state': 'PB', 'poll1': 1}, {'name': 'Joana', 'party': 'PT', 'state': 'PB', 'poll1': 1}, {'name': 'Marcio', 'party': 'PT', 'state': 'PB', 'poll1': 0}, {'name': 'Pedro', 'party': 'PSOL', 'state': 'PE', 'poll1': 0}, ]) expected_result = { 'poll1': { 'PT': 1, 'PSOL': 0, } } groupby = 'party' rollcall = Rollcall(data) result = rollcall.median_votes_groupped_by(groupby) self.assertEqual(result.to_dict(), expected_result)
def main(self, csv_path, majority_percentual=None, groupby=None, metric_method=RiceIndex().calculate_adjusted, **filters): """Calculates the adjusted Rice Index polls contained in a CSV Args: csv_path (string): Path to a CSV file with polls as columns and people/groups' votes as rows. The votes should be either 1 for YES, 2 for NO, or empty for Not Voted. Example: poll1,poll2,poll3 0,,1 1,1,1 majority_percentual (float): Removes votes where the majority was greater than this percentual. Defaults to None. groupby (string): Column on the metadata to group the votes by. This is useful when you want to compare a larger party with a smaller one. You would set groupby = "party", so this method will get each party's most common vote to calculate the cohesion. Defaults to None. metric_method (function): Method that receives a list of votes and returns a score. For an example, check RiceIndex().calculate. Defaults to RiceIndex().calculate_adjusted. filters (kwargs): dict of filters to limit which votes we consider when calculating the metric. Defaults to None. Returns: list(OrderedDict): A list of dicts with each poll name in the keys and the resulting metric score in the values. If there's no `metric_method`, it'll simply remove unanimous votes, apply groups and filters. """ votes = Rollcall.from_csv(csv_path)\ .remove_unanimous_votes(majority_percentual)\ .filter(filters)\ .median_votes_groupped_by(groupby) if metric_method: metrics = self.calculate_metric(votes, metric_method) return [collections.OrderedDict(zip(votes.columns, metrics))] if groupby: votes.insert(0, groupby, votes.index) rows = [] columns = votes.columns.tolist() replace_nan_with_none = lambda df: df.where(pd.notnull(df), None) for row in replace_nan_with_none(votes).itertuples(False): row = [self._convert_to_int_if_possible(r) for r in row] rows.append(collections.OrderedDict(zip(columns, row))) return rows