def ky_by_candidate(): generated_js.write('candidate_contributions = [') current_cand_ky_contrib = ky_candidate_contributions.where( lambda r: r['cmte_id'] in current_candidate_cmte_ids) current_candidate_groups = current_cand_ky_contrib.group_by('cand_nm') current_candidate_totals = current_candidate_groups.aggregate([ ('contributions_count', agate.Count()), ('contributions_sum', agate.Sum('contb_receipt_amt')) ]) sorted_current_candidate_totals = current_candidate_totals.order_by( 'contributions_sum', reverse=True) for row in sorted_current_candidate_totals.rows: generated_js.write('{name: "' + row[0] + '", count: ' + str(row[1]) + ', sum: ' + str(row[2]) + ', status: "current"},') dropped_cand_ky_contrib = ky_candidate_contributions.where( lambda r: r['cmte_id'] not in current_candidate_cmte_ids) dropped_candidate_groups = dropped_cand_ky_contrib.group_by('cand_nm') dropped_candidate_totals = dropped_candidate_groups.aggregate([ ('contributions_count', agate.Count()), ('contributions_sum', agate.Sum('contb_receipt_amt')) ]) sorted_dropped_candidate_totals = dropped_candidate_totals.order_by( 'contributions_sum', reverse=True) filtered_dropped_candidate_totals = sorted_dropped_candidate_totals.where( lambda r: r['contributions_sum'] > 25000) for row in filtered_dropped_candidate_totals.rows: generated_js.write('{name: "' + row[0] + '", count: ' + str(row[1]) + ', sum: ' + str(row[2]) + ', status: "dropped"},') generated_js.write(']\n')
def summarize(): table_1965_raw = agate.Table.from_csv('processed-data/1965.csv', COLUMNS) table_2015_raw = agate.Table.from_csv('processed-data/2015.csv', COLUMNS) for region, states, population in REGIONS: table_1965 = table_1965_raw.where(lambda row: row['state'] in states) table_2015 = table_2015_raw.where(lambda row: row['state'] in states) output = [] for col_name, col_type in COLUMNS[2:]: row = OrderedDict() row['var'] = col_name row['1965'] = table_1965.columns[col_name].aggregate(agate.Sum()) row['1965_per_capita'] = row['1965'] / population['1965'] row['2015'] = table_2015.columns[col_name].aggregate(agate.Sum()) row['2015_per_capita'] = row['2015'] / population['2014'] row['absolute_percent_change'] = (row['2015'] - row['1965']) / row['1965'] row['per_capita_percent_change'] = ( row['2015_per_capita'] - row['1965_per_capita']) / row['1965_per_capita'] output.append(row) dataset.freeze(output, format='csv', filename='processed-data/{0}-sums.csv'.format(region))
def write_weighted_means_csv(): column_names = ['county_type'] column_types = [text_type] for age in ages: for income in incomes: column_names.append('weighted_mean_{0}yo_{1}k'.format(age, income)) column_types.append(number_type) county_types = [(rural_weighted, 'rural'), (small_towns_weighted, 'small_towns'), (metro_weighted, 'metro')] rows = [] for county_type in county_types: row = [county_type[1]] total_population = county_type[0].aggregate(agate.Sum('Population')) for age in ages: for income in incomes: score = county_type[0].aggregate( agate.Sum('weighted_score_{0}yo_{1}k'.format(age, income))) row.append(score / total_population) rows.append(row) table = agate.Table(rows, column_names, column_types).to_csv('data/output/weighted_means.csv')
def year_sum_counts(data): data['groupped_year'] = data['table'].group_by('year').aggregate([ ('killed', agate.Sum('killed')), ('injured', agate.Sum('injured')), ('accidents', agate.Count()), ('accidents_injured', count_accidents_injured) ]) return data
def calculate_trump_pct(table): trump_total = table.aggregate(agate.Sum('trump_votecount')) other_total = 0 for cand in ['clinton', 'johnson', 'stein', 'mcmullin', 'other']: other_total += table.aggregate(agate.Sum('{0}_votecount'.format(cand))) return (trump_total / (trump_total + other_total)) * 100
def statistics(data): data['statistics'] = data['table'].aggregate([ ('killed', agate.Sum('killed')), ('injured', agate.Sum('injured')), ('accidents', agate.Count()), ('mean_accidents', agate.Mean('accidents')), ('mean_killed', agate.Mean('killed')), ('mean_injured', agate.Mean('injured')) ]) return data
def year_police_beat_sum_counts(data): data['year_police_beat'] = data['table'].group_by('year').group_by( 'police_beat').aggregate([ ('killed', agate.Sum('killed')), ('injured', agate.Sum('injured')), ('accidents', agate.Count()) ]).compute([('weighted_rank', RankWeightedAccidents('year')), ('killed_rank', GroupRanking('killed', 'year')), ('accidents_rank', GroupRanking('accidents', 'year')), ('injured_rank', GroupRanking('injured', 'year'))]) return data
def subset(data): subset = data['table'].where( lambda r: r['origin'] in SELECTED_COUNTRIES and r['year'] >= 1980) groups = subset.group_by(lambda r: '/'.join([str(r['year']), r['origin']]), key_name='year_and_origin') refugees = groups.aggregate([ ('refugees', agate.Sum('refugees')), ('asylum_seekers', agate.Sum('asylum_seekers')), ('returned_refugees', agate.Sum('returned_refugees')), ('idps', agate.Sum('idps')), ('returned_idps', agate.Sum('returned_idps')), ('stateless_persons', agate.Sum('stateless_persons')), ('others', agate.Sum('others')), ('total', agate.Sum('total')) ]).order_by('year_and_origin', reverse=True) refugees = refugees.compute([ ('year', agate.Formula(agate.Text(), lambda r: r['year_and_origin'].split('/')[0])), ('origin', agate.Formula(agate.Text(), lambda r: r['year_and_origin'].split('/')[1])) ]) refugees = refugees.select([ 'origin', 'year', 'refugees', 'asylum_seekers', 'idps', 'returned_idps', 'stateless_persons', 'others', 'total' ]) refugees.to_csv('subset.csv') refugees.pivot( 'year', 'origin', agate.Sum('total')).order_by('year').to_csv('subset_pivot.csv')
def count_years(data): refugees = data['by_year'].aggregate([( 'total_refugees', agate.Sum('refugees'), )]).order_by('year') refugees.print_table() total = data['by_year'].aggregate([( 'total_total', agate.Sum('total'), )]).order_by('year') total.to_csv('years.csv')
def sum_counts_by_hour(data): data['hour'] = data['table'].group_by('hour').aggregate([ ('killed', agate.Sum('killed')), ('injured', agate.Sum('injured')), ('accidents', agate.Count()), ('accidents_injured', count_accidents_injured) ]).compute([ ('killed_percent', agate.Percent('killed')), ('injured_percent', agate.Percent('injured')), ('accidents_percent', agate.Percent('accidents')), ]).compute([ ('weighted', agate.Formula(agate.Number(), lambda r: r['killed_percent'] + r['injured_percent'])), ('accidents_within_half_deviation', StandardDeviations('accidents', 0.5)), ('killed_within_half_deviation', StandardDeviations('killed', 0.5)), ('injured_within_half_deviation', StandardDeviations('injured', 0.5)) ]) return data
def graphic(data): data['grouped'] = ( data['table'].group_by('origin').group_by('year').aggregate([ ('total', agate.Sum('total')) ]).rename(row_names=lambda r: '%(origin)s-%(year)s' % r)) countries = {} for country in SELECTED_COUNTRIES: years = [] for year in range(FIRST_YEAR, 2015): try: name = '%s-%s' % (country, year) row = data['grouped'].rows[name] years.append(row['total']) except KeyError: years.append(None) years.append(MID_YEAR_2015[country]) countries[country] = years totals = (data['table'].group_by('year').aggregate([ ('total', agate.Sum('total')) ]).rename(row_names=lambda r: str(r['year']))) years = [] for year in range(FIRST_YEAR, 2015): row = totals.rows[str(year)] years.append(row['total']) years.append(MID_YEAR_2015['total']) countries['total'] = years with open('src/data/refugees.json', 'w') as f: json.dump(countries, f, cls=DecimalEncoder)
def worst_country_year(data): country_year = data['table'].group_by( lambda r: ' / '.join([r['origin'], str(r['year'])]), key_name='origin_and_year') refugees = country_year.aggregate([ ('refugees', agate.Sum('refugees')), ('asylum_seekers', agate.Sum('asylum_seekers')), ('returned_refugees', agate.Sum('returned_refugees')), ('idps', agate.Sum('idps')), ('returned_idps', agate.Sum('returned_idps')), ('stateless_persons', agate.Sum('stateless_persons')), ('others', agate.Sum('others')), ('total', agate.Sum('total')) ]).order_by('total', reverse=True) refugees.print_table(30)
def to_and_from(data): refugees = data['table'].select( ['origin', 'residence', 'year', 'refugees']) by_year = refugees.group_by('year') by_origin = (by_year.group_by('origin').aggregate([ ('origin_refugees', agate.Sum('refugees')) ])) by_residence = (by_year.group_by('residence').aggregate([ ('residence_refugees', agate.Sum('refugees')) ])) def comparison(r): origin = r['origin_refugees'] residence = r['residence_refugees'] if not origin: return None if not residence: return None return 1 - (abs(origin - residence) / (origin + residence)) joined = (by_origin.join(by_residence, lambda r: (r['year'], r['origin']), lambda r: (r['year'], r['residence'])).exclude([ 'residence', 'year2' ]).rename(column_names={ 'origin': 'country' }).compute([ ('ratio', agate.Formula(agate.Number(), comparison)) ])) joined.to_csv('joined.csv')
def print_ky_current_candidate_sum(): current_cand_ky_contrib = ky_candidate_contributions.where( lambda r: r['cmte_id'] in current_candidate_cmte_ids) ky_current_candidate_count = current_cand_ky_contrib.aggregate( agate.Count()) ky_current_candidate_sum = current_cand_ky_contrib.aggregate( agate.Sum('contb_receipt_amt')) current_candidate_count = len(current_candidate_cmte_ids) print('There are currently ' + str(current_candidate_count) + ' candidates running for president. Those ' + str(current_candidate_count) + ' candidates have received ' + str(ky_current_candidate_count) + ' donations totaling $' + str(ky_current_candidate_sum))
def print_ky_overall_summary(): # How much money has been donated by Kentuckians to the 2016 presidential race? ky_contrib_sum = ky_all_contributions.aggregate( agate.Sum('TRANSACTION_AMT')) # How many contributions have Kentuckians made to the presidential race? ky_contrib_count = ky_all_contributions.aggregate(agate.Count()) print( str(ky_contrib_count) + ' donations, totaling $' + str(ky_contrib_sum) + ' have been donated by Kentuckians to the 2016 presidential race.') generated_js.write('total_donated_sum = ' + str(ky_contrib_sum) + '\ntotal_donated_count = ' + str(ky_contrib_count) + '\n')
def top_ky_donors_candidates(): contributor_groups = ky_candidate_contributions.group_by('contbr_nm') contributor_totals = contributor_groups.aggregate([ ('contributions_count', agate.Count()), ('contributions_sum', agate.Sum('contb_receipt_amt')) ]) sorted_contributor_totals = contributor_totals.order_by( 'contributions_sum', reverse=True) sorted_contributor_totals.print_table() generated_js.write('top_donors_to_candidates = [') for row in itertools.islice(sorted_contributor_totals.rows, 0, 5): generated_js.write('{name: "' + row[0] + '", count: ' + str(row[1]) + ', sum: ' + str(row[2]) + '},') generated_js.write(']\n')
def top_ky_donors_pac(): contributor_groups = ky_all_contributions.group_by('NAME') contributor_totals = contributor_groups.aggregate([ ('contributions_count', agate.Count()), ('contributions_sum', agate.Sum('TRANSACTION_AMT')) ]) sorted_contributor_totals = contributor_totals.order_by( 'contributions_sum', reverse=True) sorted_contributor_totals.print_table() generated_js.write('top_donors_to_pacs = [') for row in itertools.islice(sorted_contributor_totals.rows, 0, 5): generated_js.write('{name: "' + row[0] + '", count: ' + str(row[1]) + ', sum: ' + str(row[2]) + '},') generated_js.write(']\n')
def print_ky_candidate_summary(): # How much money has been donated by Kentuckians to the presidential candidates? ky_candidate_sum = ky_candidate_contributions.aggregate( agate.Sum('contb_receipt_amt')) # How many contributions have Kentuckians made to presidential candidates? ky_candidate_count = ky_candidate_contributions.aggregate(agate.Count()) print( str(ky_candidate_count) + ' donations, totaling $' + str(ky_candidate_sum) + ' have been donated by Kentuckians specifically to the 2016 presidential candidates.' ) generated_js.write('total_candidate_donated_sum = ' + str(ky_candidate_sum) + '\ntotal_candidate_donated_count = ' + str(ky_candidate_count) + '\n')
def organisations(event, context): table = get_all_orgs() table.to_csv('/tmp/open_data_germany.csv') upload_file_to_s3('open_data_cities.csv','/tmp/open_data_germany.csv') aggregates = table.aggregate([ ('count', agate.Count()), ('sum', agate.Sum('datasets')) ]) with open('/tmp/summary.json', 'w') as f: json.dump(aggregates, f) upload_file_to_s3('open_data_cities_summary.json','/tmp/summary.json') body = { "message": "Go Serverless v1.0! Your function executed successfully!", "input": event } response = { "statusCode": 200, "body": json.dumps(body) } return response
def year_police_beat_sum_counts(data): data['year_police_beat'] = data['table'].group_by('year').group_by( 'police_beat').aggregate([('killed', agate.Sum('killed')), ('injured', agate.Sum('injured')), ('accidents', agate.Count())])
def sum_counts_by_full_hour(data): data['full_hour'] = data['table'].group_by('date_hour').aggregate([ ('killed', agate.Sum('killed')), ('injured', agate.Sum('injured')), ('accidents', agate.Count()) ])
import agate tester = agate.TypeTester(force={'fips': agate.Text()}) table = agate.Table.from_csv('examples/realdata/ks_1033_data.csv', column_types=tester) # Question 1: What was the total cost to Kansas City area counties? # Filter to counties containing Kansas City kansas_city = table.where(lambda r: r['county'] in ('JACKSON', 'CLAY', 'CASS', 'PLATTE')) # Sum total_cost of four counties print('Total for Kansas City area: %i' % kansas_city.columns['total_cost'].aggregate(agate.Sum())) # Question 2: Which counties spent the most? # Group by counties counties = table.group_by('county') # Aggregate totals for all counties totals = counties.aggregate([('total_cost', agate.Sum(), 'total_cost_sum')]) totals = totals.order_by('total_cost_sum', reverse=True) totals.limit(20).print_bars('county', 'total_cost_sum', width=80) print('Five most spendy counties:') totals.print_table(5)
tester = agate.TypeTester(force={ ' Date': agate.Date('%Y-%m-%d') }) emissions = agate.Table.from_csv('examples/epa-emissions-20150910.csv', tester) emissions = emissions.compute([ (agate.Formula(agate.Number(), lambda r: r[' Date'].day), 'day'), (agate.Formula(agate.Number(), lambda r: r[' SO2 (tons)'] or 0), 'so2'), (agate.Formula(agate.Number(), lambda r: r[' NOx (tons)'] or 0), 'noX'), (agate.Formula(agate.Number(), lambda r: r[' CO2 (short tons)'] or 0), 'co2') ]) states = emissions.group_by('State') state_totals = states.aggregate([ ('so2', agate.Sum(), 'so2'), ('co2', agate.Sum(), 'co2'), ('noX', agate.Sum(), 'noX') ]) new_york = states['NY'] # NB: key_type shouldn't be necessary--agate bug #234 days = emissions.group_by('day', key_type=agate.Number()) day_totals = days.aggregate([ ('so2', agate.Sum(), 'so2'), ('co2', agate.Sum(), 'co2'), ('noX', agate.Sum(), 'noX') ]) dates = emissions.group_by(' Date', key_type=agate.Date('%Y-%m-%d'))
# pref_corr.print_table() # first assign block_forced courses TODO # generate a table with a column of blocknumbers blockarray = [] for b in range(1, n_blocks + 1): blockarray.append([b]) blocksumtable_empty = agate.Table(blockarray, ['block'], [agate.Number()]) # put every course in a block for course in courses: #print('course: '+course) block_corr_sum = pref_corr.join(courselist,'course2','course') \ .where(lambda row : row['course'] == course) \ .pivot('block', aggregation=agate.Sum('corr')) \ .where(lambda row : row['block'] != None) block_corr_sum = blocksumtable_empty.join(block_corr_sum,'block','block') \ .pivot('block', aggregation=agate.Sum('Sum')) \ .join(courselist.pivot('block'),'block','block') \ .order_by(lambda r: (r['Sum'], r['Count'])) # block_corr_sum.print_table() # check the repeats bestblocks = block_corr_sum.columns['block'] repeats = shortlist.where( lambda r: r['course'] == course).columns['repeats'][0] if repeats > n_blocks: repeats = n_blocks for instance in range(1, repeats + 1): courseid = courselist.where(lambda r: (r['course'] == course) and (r[ 'instance'] == instance)).columns['id'][0]
import agate tester = agate.TypeTester(force={ 'fips': agate.Text() }) table = agate.Table.from_csv('examples/realdata/ks_1033_data.csv', column_types=tester) # Question 1: What was the total cost to Kansas City area counties? # Filter to counties containing Kansas City kansas_city = table.where(lambda r: r['county'] in ('JACKSON', 'CLAY', 'CASS', 'PLATTE')) # Sum total_cost of four counties print('Total for Kansas City area: %i' % kansas_city.aggregate(agate.Sum('total_cost'))) # Question 2: Which counties spent the most? # Group by counties counties = table.group_by('county') # Aggregate totals for all counties totals = counties.aggregate([ ('total_cost_sum', agate.Sum('total_cost')) ]) totals = totals.order_by('total_cost_sum', reverse=True) totals.limit(20).print_bars('county', 'total_cost_sum', width=80) print('Five most spendy counties:')
def print_contributions_by_cmte_type(): #republican_type = ['REP'] #democrate_type = ['DEM'] # ## Creating lists of republican and democratic cmte_ids #rep_cmte_list = cmte_list.where( # lambda r: r['CMTE_PTY_AFFILIATION'] in republican_type #) #dem_cmte_list = cmte_list.where( # lambda r: r['CMTE_PTY_AFFILIATION'] in democrate_type #) #rep_cmte_id_list = [] #for row in rep_cmte_list.rows: # rep_cmte_id_list.append(row['CMTE_ID']) #dem_cmte_id_list = [] #for row in dem_cmte_list.rows: # dem_cmte_id_list.append(row['CMTE_ID']) rep_cmte_id_list = [ 'C00579458', 'C00573519', 'C00580399', 'C00574624', 'C00577312', 'C00578757', 'C00577981', 'C00581876', 'C00575449', 'C00458844', 'C00578492', 'C00580100', 'C00580480' ] # Jeb Bush = C00579458 # Carson = C00573519 # Christie = C00580399 # Cruz = C00574624 # Fiorino = C00577312 # Graham = C00578757 # Huckabee = C00577981 # Kasich = C00581876 # Paul = C00575449 # Rubio = C00458844 # Santorum = C00578492 # Trump = C00580100 # Walker = C00580480 dem_cmte_id_list = [ 'C00575795', 'C00583146', 'C00578658', 'C00577130', 'C00581215' ] # Clinton = C00575795 # Lessig = C00583146 # OMalley = C00578658 # Sanders = C00577130 # Webb = C00581215 # Run through all the individual contributions and pull out the ones made # to republican committees and then those made to democratic committees. rep_contributions = ky_candidate_contributions.where( lambda r: r['cmte_id'] in rep_cmte_id_list) dem_contributions = ky_candidate_contributions.where( lambda r: r['cmte_id'] in dem_cmte_id_list) rep_contrib_count = rep_contributions.aggregate(agate.Count()) rep_contrib_sum = rep_contributions.aggregate( agate.Sum('contb_receipt_amt')) print( str(rep_contrib_count) + ' contributions to Republican committees, totaling $' + str(rep_contrib_sum)) generated_js.write('to_republicans = ' + str(rep_contrib_sum) + '\n') dem_contrib_count = dem_contributions.aggregate(agate.Count()) dem_contrib_sum = dem_contributions.aggregate( agate.Sum('contb_receipt_amt')) print( str(dem_contrib_count) + ' contributions to Democratic committees, totaling $' + str(dem_contrib_sum)) generated_js.write('to_democrats = ' + str(dem_contrib_sum) + '\n')
def candidate_time_charts(): os.remove('app/data/candidate_charts.js') text_type = agate.Text() datetime_type = agate.DateTime() chart_js = open('app/data/candidate_charts.js', 'a') candidate_contribs_with_monthyear = ky_candidate_contributions.compute([ ('month_year', agate.Formula(text_type, lambda r: r['contb_receipt_dt'][-6:])), ('date', agate.Formula( text_type, lambda r: datetime.datetime.strptime( r['contb_receipt_dt'], '%d-%b-%y'))) ]) date_sorted_candidat_contribs = candidate_contribs_with_monthyear.order_by( 'date') restricted_date_candidate_contribs = date_sorted_candidat_contribs.where( lambda r: r['date'] > '2015-02-28 00:00:00') by_candidate_contribs = candidate_contribs_with_monthyear.group_by( 'cand_nm') # We need a list of unique candidates and a list of unique month_years # Then we need to say, for each month_year and each candidate, how many contributions # happened. # We only need to write one label variable for all candidates: # labels = ['FEB-15', 'MAR-15', etc...] # For each candidate, we need: # candidateName_series = [200, 34, 885, 123, etc...] # Get unique list of month_years. # These are our labels. # We'll have to figure out how to sort these month_years = [] for row in restricted_date_candidate_contribs.rows: month_year = row['month_year'] if month_year in month_years: pass else: month_years.append(str(month_year)) # Get unique list of candidates candidates = [] for row in candidate_contribs_with_monthyear.rows: candidate = row['cand_nm'] if candidate in candidates: pass else: candidates.append(candidate) candidate_month_year_groups = by_candidate_contribs.group_by( lambda r: r['month_year'], key_name='month_year_group') month_year_counts = candidate_month_year_groups.aggregate([ ('contribution_count', agate.Count()), ('contribution_sum', agate.Sum('contb_receipt_amt')) ]) #month_year_counts.print_table(max_rows=200) chart_js.write('count_labels = ' + str(month_years) + '\n') # For each candidate, each month, we want one value for count and one value for sum # If these values cannot be found in the month_year_counts table, then we should record a 0 for candidate in candidates: count_value_list = [] sum_value_list = [] for month in month_years: contrib_count = 0 contrib_sum = 0 for row in month_year_counts.rows: if row['cand_nm'] == candidate: series_label = candidate.split(',')[0].lower() if month == row['month_year_group']: contrib_count = str(row['contribution_count']) #contrib_count = '{:,f}'.format(row['contribution_count']) contrib_count_dict = {} contrib_count_dict['meta'] = str('Contributions to ' + candidate + ' for ' + month) contrib_count_dict['value'] = contrib_count count_value_list.append(dict(contrib_count_dict)) contrib_sum = str(row['contribution_sum']) #contrib_sum = '${:,.2f}'.format(row['contribution_sum']) contrib_sum_dict = {} contrib_sum_dict['meta'] = str('Amt. contributed to ' + candidate + ' for ' + month) contrib_sum_dict['value'] = contrib_sum sum_value_list.append(dict(contrib_sum_dict)) else: pass if contrib_count == 0: contrib_count_dict = {} contrib_count_dict['meta'] = str('Contributions to ' + candidate + ' for ' + month) contrib_count_dict['value'] = '0' count_value_list.append(dict(contrib_count_dict)) if contrib_sum == 0: contrib_sum_dict = {} contrib_sum_dict['meta'] = str('Amount contributed to ' + candidate + ' for ' + month) contrib_sum_dict['value'] = '0' sum_value_list.append(dict(contrib_sum_dict)) chart_js.write(series_label + '_count_series = ' + str(count_value_list) + '\n') chart_js.write(series_label + '_sum_series = ' + str(sum_value_list) + '\n') chart_js.close()
def count_origins(data): refugees = data['by_origin_2014'].aggregate([ ('total_refugees', agate.Sum('refugees')) ]).order_by('total_refugees', reverse=True) refugees.print_table(20)
# find total minutes played unique_quarters = data_with_quarter.distinct('quarter_id').select(['quarter']) def getMinutes(row): if (row['quarter'] == 4): return 2 return 5 quarter_minutes = unique_quarters.compute([ ('minutes_played', agate.Formula(agate.Number(), getMinutes)) ]) minutes_played = int(quarter_minutes.aggregate(agate.Sum('minutes_played'))) num_calls_incorrect_per_minute = float(num_calls_incorrect) / float( minutes_played) # export summary data summary_column_names = ['key', 'value'] summary_column_types = [agate.Text(), agate.Text()] summary_rows = [ ('num_games', format(num_games, ',d')), ('num_calls', format(num_calls, ',d')), ('num_calls_incorrect', format(num_calls_incorrect, ',d')), ('minutes_played', format(minutes_played, ',d')), ('percent_incorrect', format(percent_incorrect, '.1f')), ('num_calls_incorrect_per_game', format(num_calls_incorrect_per_game,
import agate tester = agate.TypeTester(force={'fips': agate.Text()}) table = agate.Table.from_csv('examples/realdata/ks_1033_data.csv', column_types=tester) # Question 1: What was the total cost to Kansas City area counties? # Filter to counties containing Kansas City kansas_city = table.where(lambda r: r['county'] in ('JACKSON', 'CLAY', 'CASS', 'PLATTE')) # Sum total_cost of four counties print('Total for Kansas City area: %i' % kansas_city.aggregate(agate.Sum('total_cost'))) # Question 2: Which counties spent the most? # Group by counties counties = table.group_by('county') # Aggregate totals for all counties totals = counties.aggregate([('total_cost_sum', agate.Sum('total_cost'))]) totals = totals.order_by('total_cost_sum', reverse=True) totals.limit(20).print_bars('county', 'total_cost_sum', width=80) print('Five most spendy counties:') totals.print_table(5)