示例#1
0
def ky_by_candidate():

    generated_js.write('candidate_contributions = [')

    current_cand_ky_contrib = ky_candidate_contributions.where(
        lambda r: r['cmte_id'] in current_candidate_cmte_ids)
    current_candidate_groups = current_cand_ky_contrib.group_by('cand_nm')
    current_candidate_totals = current_candidate_groups.aggregate([
        ('contributions_count', agate.Count()),
        ('contributions_sum', agate.Sum('contb_receipt_amt'))
    ])
    sorted_current_candidate_totals = current_candidate_totals.order_by(
        'contributions_sum', reverse=True)
    for row in sorted_current_candidate_totals.rows:
        generated_js.write('{name: "' + row[0] + '", count: ' + str(row[1]) +
                           ', sum: ' + str(row[2]) + ', status: "current"},')

    dropped_cand_ky_contrib = ky_candidate_contributions.where(
        lambda r: r['cmte_id'] not in current_candidate_cmte_ids)
    dropped_candidate_groups = dropped_cand_ky_contrib.group_by('cand_nm')
    dropped_candidate_totals = dropped_candidate_groups.aggregate([
        ('contributions_count', agate.Count()),
        ('contributions_sum', agate.Sum('contb_receipt_amt'))
    ])
    sorted_dropped_candidate_totals = dropped_candidate_totals.order_by(
        'contributions_sum', reverse=True)
    filtered_dropped_candidate_totals = sorted_dropped_candidate_totals.where(
        lambda r: r['contributions_sum'] > 25000)
    for row in filtered_dropped_candidate_totals.rows:
        generated_js.write('{name: "' + row[0] + '", count: ' + str(row[1]) +
                           ', sum: ' + str(row[2]) + ', status: "dropped"},')

    generated_js.write(']\n')
示例#2
0
def summarize():
    table_1965_raw = agate.Table.from_csv('processed-data/1965.csv', COLUMNS)
    table_2015_raw = agate.Table.from_csv('processed-data/2015.csv', COLUMNS)

    for region, states, population in REGIONS:
        table_1965 = table_1965_raw.where(lambda row: row['state'] in states)
        table_2015 = table_2015_raw.where(lambda row: row['state'] in states)

        output = []

        for col_name, col_type in COLUMNS[2:]:
            row = OrderedDict()
            row['var'] = col_name
            row['1965'] = table_1965.columns[col_name].aggregate(agate.Sum())
            row['1965_per_capita'] = row['1965'] / population['1965']
            row['2015'] = table_2015.columns[col_name].aggregate(agate.Sum())
            row['2015_per_capita'] = row['2015'] / population['2014']
            row['absolute_percent_change'] = (row['2015'] -
                                              row['1965']) / row['1965']
            row['per_capita_percent_change'] = (
                row['2015_per_capita'] -
                row['1965_per_capita']) / row['1965_per_capita']

            output.append(row)

        dataset.freeze(output,
                       format='csv',
                       filename='processed-data/{0}-sums.csv'.format(region))
示例#3
0
def write_weighted_means_csv():
    column_names = ['county_type']
    column_types = [text_type]

    for age in ages:
        for income in incomes:
            column_names.append('weighted_mean_{0}yo_{1}k'.format(age, income))
            column_types.append(number_type)

    county_types = [(rural_weighted, 'rural'),
                    (small_towns_weighted, 'small_towns'),
                    (metro_weighted, 'metro')]

    rows = []
    for county_type in county_types:
        row = [county_type[1]]
        total_population = county_type[0].aggregate(agate.Sum('Population'))
        for age in ages:
            for income in incomes:
                score = county_type[0].aggregate(
                    agate.Sum('weighted_score_{0}yo_{1}k'.format(age, income)))
                row.append(score / total_population)

        rows.append(row)

    table = agate.Table(rows, column_names,
                        column_types).to_csv('data/output/weighted_means.csv')
示例#4
0
def year_sum_counts(data):
    data['groupped_year'] = data['table'].group_by('year').aggregate([
        ('killed', agate.Sum('killed')), ('injured', agate.Sum('injured')),
        ('accidents', agate.Count()),
        ('accidents_injured', count_accidents_injured)
    ])
    return data
示例#5
0
def calculate_trump_pct(table):
    trump_total = table.aggregate(agate.Sum('trump_votecount'))

    other_total = 0
    for cand in ['clinton', 'johnson', 'stein', 'mcmullin', 'other']:
        other_total += table.aggregate(agate.Sum('{0}_votecount'.format(cand)))

    return (trump_total / (trump_total + other_total)) * 100
示例#6
0
def statistics(data):
    data['statistics'] = data['table'].aggregate([
        ('killed', agate.Sum('killed')), ('injured', agate.Sum('injured')),
        ('accidents', agate.Count()),
        ('mean_accidents', agate.Mean('accidents')),
        ('mean_killed', agate.Mean('killed')),
        ('mean_injured', agate.Mean('injured'))
    ])
    return data
示例#7
0
def year_police_beat_sum_counts(data):
    data['year_police_beat'] = data['table'].group_by('year').group_by(
        'police_beat').aggregate([
            ('killed', agate.Sum('killed')), ('injured', agate.Sum('injured')),
            ('accidents', agate.Count())
        ]).compute([('weighted_rank', RankWeightedAccidents('year')),
                    ('killed_rank', GroupRanking('killed', 'year')),
                    ('accidents_rank', GroupRanking('accidents', 'year')),
                    ('injured_rank', GroupRanking('injured', 'year'))])
    return data
示例#8
0
文件: prove.py 项目: Quartz/refugees
def subset(data):
    subset = data['table'].where(
        lambda r: r['origin'] in SELECTED_COUNTRIES and r['year'] >= 1980)
    groups = subset.group_by(lambda r: '/'.join([str(r['year']), r['origin']]),
                             key_name='year_and_origin')

    refugees = groups.aggregate([
        ('refugees', agate.Sum('refugees')),
        ('asylum_seekers', agate.Sum('asylum_seekers')),
        ('returned_refugees', agate.Sum('returned_refugees')),
        ('idps', agate.Sum('idps')),
        ('returned_idps', agate.Sum('returned_idps')),
        ('stateless_persons', agate.Sum('stateless_persons')),
        ('others', agate.Sum('others')), ('total', agate.Sum('total'))
    ]).order_by('year_and_origin', reverse=True)

    refugees = refugees.compute([
        ('year',
         agate.Formula(agate.Text(),
                       lambda r: r['year_and_origin'].split('/')[0])),
        ('origin',
         agate.Formula(agate.Text(),
                       lambda r: r['year_and_origin'].split('/')[1]))
    ])

    refugees = refugees.select([
        'origin', 'year', 'refugees', 'asylum_seekers', 'idps',
        'returned_idps', 'stateless_persons', 'others', 'total'
    ])

    refugees.to_csv('subset.csv')
    refugees.pivot(
        'year', 'origin',
        agate.Sum('total')).order_by('year').to_csv('subset_pivot.csv')
示例#9
0
文件: prove.py 项目: Quartz/refugees
def count_years(data):
    refugees = data['by_year'].aggregate([(
        'total_refugees',
        agate.Sum('refugees'),
    )]).order_by('year')

    refugees.print_table()

    total = data['by_year'].aggregate([(
        'total_total',
        agate.Sum('total'),
    )]).order_by('year')

    total.to_csv('years.csv')
示例#10
0
def sum_counts_by_hour(data):
    data['hour'] = data['table'].group_by('hour').aggregate([
        ('killed', agate.Sum('killed')), ('injured', agate.Sum('injured')),
        ('accidents', agate.Count()),
        ('accidents_injured', count_accidents_injured)
    ]).compute([
        ('killed_percent', agate.Percent('killed')),
        ('injured_percent', agate.Percent('injured')),
        ('accidents_percent', agate.Percent('accidents')),
    ]).compute([
        ('weighted',
         agate.Formula(agate.Number(),
                       lambda r: r['killed_percent'] + r['injured_percent'])),
        ('accidents_within_half_deviation',
         StandardDeviations('accidents', 0.5)),
        ('killed_within_half_deviation', StandardDeviations('killed', 0.5)),
        ('injured_within_half_deviation', StandardDeviations('injured', 0.5))
    ])
    return data
示例#11
0
文件: prove.py 项目: Quartz/refugees
def graphic(data):
    data['grouped'] = (
        data['table'].group_by('origin').group_by('year').aggregate([
            ('total', agate.Sum('total'))
        ]).rename(row_names=lambda r: '%(origin)s-%(year)s' % r))

    countries = {}

    for country in SELECTED_COUNTRIES:
        years = []

        for year in range(FIRST_YEAR, 2015):
            try:
                name = '%s-%s' % (country, year)
                row = data['grouped'].rows[name]
                years.append(row['total'])
            except KeyError:
                years.append(None)

        years.append(MID_YEAR_2015[country])

        countries[country] = years

    totals = (data['table'].group_by('year').aggregate([
        ('total', agate.Sum('total'))
    ]).rename(row_names=lambda r: str(r['year'])))

    years = []

    for year in range(FIRST_YEAR, 2015):
        row = totals.rows[str(year)]
        years.append(row['total'])

    years.append(MID_YEAR_2015['total'])

    countries['total'] = years

    with open('src/data/refugees.json', 'w') as f:
        json.dump(countries, f, cls=DecimalEncoder)
示例#12
0
文件: prove.py 项目: Quartz/refugees
def worst_country_year(data):
    country_year = data['table'].group_by(
        lambda r: ' / '.join([r['origin'], str(r['year'])]),
        key_name='origin_and_year')

    refugees = country_year.aggregate([
        ('refugees', agate.Sum('refugees')),
        ('asylum_seekers', agate.Sum('asylum_seekers')),
        ('returned_refugees', agate.Sum('returned_refugees')),
        ('idps', agate.Sum('idps')),
        ('returned_idps', agate.Sum('returned_idps')),
        ('stateless_persons', agate.Sum('stateless_persons')),
        ('others', agate.Sum('others')), ('total', agate.Sum('total'))
    ]).order_by('total', reverse=True)

    refugees.print_table(30)
示例#13
0
文件: prove.py 项目: Quartz/refugees
def to_and_from(data):
    refugees = data['table'].select(
        ['origin', 'residence', 'year', 'refugees'])

    by_year = refugees.group_by('year')

    by_origin = (by_year.group_by('origin').aggregate([
        ('origin_refugees', agate.Sum('refugees'))
    ]))

    by_residence = (by_year.group_by('residence').aggregate([
        ('residence_refugees', agate.Sum('refugees'))
    ]))

    def comparison(r):
        origin = r['origin_refugees']
        residence = r['residence_refugees']

        if not origin:
            return None

        if not residence:
            return None

        return 1 - (abs(origin - residence) / (origin + residence))

    joined = (by_origin.join(by_residence, lambda r: (r['year'], r['origin']),
                             lambda r: (r['year'], r['residence'])).exclude([
                                 'residence', 'year2'
                             ]).rename(column_names={
                                 'origin': 'country'
                             }).compute([
                                 ('ratio',
                                  agate.Formula(agate.Number(), comparison))
                             ]))

    joined.to_csv('joined.csv')
示例#14
0
def print_ky_current_candidate_sum():
    current_cand_ky_contrib = ky_candidate_contributions.where(
        lambda r: r['cmte_id'] in current_candidate_cmte_ids)

    ky_current_candidate_count = current_cand_ky_contrib.aggregate(
        agate.Count())
    ky_current_candidate_sum = current_cand_ky_contrib.aggregate(
        agate.Sum('contb_receipt_amt'))
    current_candidate_count = len(current_candidate_cmte_ids)

    print('There are currently ' + str(current_candidate_count) +
          ' candidates running for president. Those ' +
          str(current_candidate_count) + ' candidates have received ' +
          str(ky_current_candidate_count) + ' donations totaling $' +
          str(ky_current_candidate_sum))
示例#15
0
def print_ky_overall_summary():

    # How much money has been donated by Kentuckians to the 2016 presidential race?
    ky_contrib_sum = ky_all_contributions.aggregate(
        agate.Sum('TRANSACTION_AMT'))
    # How many contributions have Kentuckians made to the presidential race?
    ky_contrib_count = ky_all_contributions.aggregate(agate.Count())

    print(
        str(ky_contrib_count) + ' donations, totaling $' +
        str(ky_contrib_sum) +
        ' have been donated by Kentuckians to the 2016 presidential race.')

    generated_js.write('total_donated_sum = ' + str(ky_contrib_sum) +
                       '\ntotal_donated_count = ' + str(ky_contrib_count) +
                       '\n')
示例#16
0
def top_ky_donors_candidates():
    contributor_groups = ky_candidate_contributions.group_by('contbr_nm')

    contributor_totals = contributor_groups.aggregate([
        ('contributions_count', agate.Count()),
        ('contributions_sum', agate.Sum('contb_receipt_amt'))
    ])
    sorted_contributor_totals = contributor_totals.order_by(
        'contributions_sum', reverse=True)

    sorted_contributor_totals.print_table()

    generated_js.write('top_donors_to_candidates = [')
    for row in itertools.islice(sorted_contributor_totals.rows, 0, 5):
        generated_js.write('{name: "' + row[0] + '", count: ' + str(row[1]) +
                           ', sum: ' + str(row[2]) + '},')
    generated_js.write(']\n')
示例#17
0
def top_ky_donors_pac():
    contributor_groups = ky_all_contributions.group_by('NAME')

    contributor_totals = contributor_groups.aggregate([
        ('contributions_count', agate.Count()),
        ('contributions_sum', agate.Sum('TRANSACTION_AMT'))
    ])
    sorted_contributor_totals = contributor_totals.order_by(
        'contributions_sum', reverse=True)

    sorted_contributor_totals.print_table()

    generated_js.write('top_donors_to_pacs = [')
    for row in itertools.islice(sorted_contributor_totals.rows, 0, 5):
        generated_js.write('{name: "' + row[0] + '", count: ' + str(row[1]) +
                           ', sum: ' + str(row[2]) + '},')
    generated_js.write(']\n')
示例#18
0
def print_ky_candidate_summary():

    # How much money has been donated by Kentuckians to the presidential candidates?
    ky_candidate_sum = ky_candidate_contributions.aggregate(
        agate.Sum('contb_receipt_amt'))
    # How many contributions have Kentuckians made to presidential candidates?
    ky_candidate_count = ky_candidate_contributions.aggregate(agate.Count())

    print(
        str(ky_candidate_count) + ' donations, totaling $' +
        str(ky_candidate_sum) +
        ' have been donated by Kentuckians specifically to the 2016 presidential candidates.'
    )

    generated_js.write('total_candidate_donated_sum = ' +
                       str(ky_candidate_sum) +
                       '\ntotal_candidate_donated_count = ' +
                       str(ky_candidate_count) + '\n')
示例#19
0
def organisations(event, context):
    table = get_all_orgs()
    table.to_csv('/tmp/open_data_germany.csv')
    upload_file_to_s3('open_data_cities.csv','/tmp/open_data_germany.csv')
    aggregates = table.aggregate([
            ('count', agate.Count()),
            ('sum', agate.Sum('datasets'))
            ])
    with open('/tmp/summary.json', 'w') as f:
        json.dump(aggregates, f)
    upload_file_to_s3('open_data_cities_summary.json','/tmp/summary.json')
    body = {
        "message": "Go Serverless v1.0! Your function executed successfully!",
        "input": event
    }

    response = {
        "statusCode": 200,
        "body": json.dumps(body)
    }

    return response
def year_police_beat_sum_counts(data):
    data['year_police_beat'] = data['table'].group_by('year').group_by(
        'police_beat').aggregate([('killed', agate.Sum('killed')),
                                  ('injured', agate.Sum('injured')),
                                  ('accidents', agate.Count())])
def sum_counts_by_full_hour(data):
    data['full_hour'] = data['table'].group_by('date_hour').aggregate([
        ('killed', agate.Sum('killed')), ('injured', agate.Sum('injured')),
        ('accidents', agate.Count())
    ])
示例#22
0
import agate

tester = agate.TypeTester(force={'fips': agate.Text()})

table = agate.Table.from_csv('examples/realdata/ks_1033_data.csv',
                             column_types=tester)

# Question 1: What was the total cost to Kansas City area counties?

# Filter to counties containing Kansas City
kansas_city = table.where(lambda r: r['county'] in
                          ('JACKSON', 'CLAY', 'CASS', 'PLATTE'))

# Sum total_cost of four counties
print('Total for Kansas City area: %i' %
      kansas_city.columns['total_cost'].aggregate(agate.Sum()))

# Question 2: Which counties spent the most?

# Group by counties
counties = table.group_by('county')

# Aggregate totals for all counties
totals = counties.aggregate([('total_cost', agate.Sum(), 'total_cost_sum')])

totals = totals.order_by('total_cost_sum', reverse=True)
totals.limit(20).print_bars('county', 'total_cost_sum', width=80)

print('Five most spendy counties:')

totals.print_table(5)
示例#23
0
tester = agate.TypeTester(force={
    ' Date': agate.Date('%Y-%m-%d')
})

emissions = agate.Table.from_csv('examples/epa-emissions-20150910.csv', tester)

emissions = emissions.compute([
    (agate.Formula(agate.Number(), lambda r: r[' Date'].day), 'day'),
    (agate.Formula(agate.Number(), lambda r: r[' SO2 (tons)'] or 0), 'so2'),
    (agate.Formula(agate.Number(), lambda r: r[' NOx (tons)'] or 0), 'noX'),
    (agate.Formula(agate.Number(), lambda r: r[' CO2 (short tons)'] or 0), 'co2')
])

states = emissions.group_by('State')
state_totals = states.aggregate([
    ('so2', agate.Sum(), 'so2'),
    ('co2', agate.Sum(), 'co2'),
    ('noX', agate.Sum(), 'noX')
])

new_york = states['NY']

# NB: key_type shouldn't be necessary--agate bug #234
days = emissions.group_by('day', key_type=agate.Number())
day_totals = days.aggregate([
    ('so2', agate.Sum(), 'so2'),
    ('co2', agate.Sum(), 'co2'),
    ('noX', agate.Sum(), 'noX')
])

dates = emissions.group_by(' Date', key_type=agate.Date('%Y-%m-%d'))
示例#24
0
# pref_corr.print_table()

# first assign block_forced courses                                                      TODO

# generate a table with a column of blocknumbers
blockarray = []
for b in range(1, n_blocks + 1):
    blockarray.append([b])
blocksumtable_empty = agate.Table(blockarray, ['block'], [agate.Number()])

# put every course in a block
for course in courses:
    #print('course: '+course)
    block_corr_sum = pref_corr.join(courselist,'course2','course') \
        .where(lambda row : row['course'] == course) \
        .pivot('block', aggregation=agate.Sum('corr')) \
        .where(lambda row : row['block'] != None)
    block_corr_sum = blocksumtable_empty.join(block_corr_sum,'block','block') \
        .pivot('block', aggregation=agate.Sum('Sum')) \
        .join(courselist.pivot('block'),'block','block') \
        .order_by(lambda r: (r['Sum'], r['Count']))
    # block_corr_sum.print_table()
    # check the repeats
    bestblocks = block_corr_sum.columns['block']
    repeats = shortlist.where(
        lambda r: r['course'] == course).columns['repeats'][0]
    if repeats > n_blocks:
        repeats = n_blocks
    for instance in range(1, repeats + 1):
        courseid = courselist.where(lambda r: (r['course'] == course) and (r[
            'instance'] == instance)).columns['id'][0]
示例#25
0
import agate

tester = agate.TypeTester(force={
    'fips': agate.Text()
})

table = agate.Table.from_csv('examples/realdata/ks_1033_data.csv', column_types=tester)

# Question 1: What was the total cost to Kansas City area counties?

# Filter to counties containing Kansas City
kansas_city = table.where(lambda r: r['county'] in ('JACKSON', 'CLAY', 'CASS', 'PLATTE'))

# Sum total_cost of four counties
print('Total for Kansas City area: %i' % kansas_city.aggregate(agate.Sum('total_cost')))

# Question 2: Which counties spent the most?

# Group by counties
counties = table.group_by('county')

# Aggregate totals for all counties
totals = counties.aggregate([
    ('total_cost_sum', agate.Sum('total_cost'))
])

totals = totals.order_by('total_cost_sum', reverse=True)
totals.limit(20).print_bars('county', 'total_cost_sum', width=80)

print('Five most spendy counties:')
示例#26
0
def print_contributions_by_cmte_type():
    #republican_type = ['REP']
    #democrate_type = ['DEM']
    #
    ## Creating lists of republican and democratic cmte_ids
    #rep_cmte_list = cmte_list.where(
    #    lambda r: r['CMTE_PTY_AFFILIATION'] in republican_type
    #)
    #dem_cmte_list = cmte_list.where(
    #    lambda r: r['CMTE_PTY_AFFILIATION'] in democrate_type
    #)
    #rep_cmte_id_list = []
    #for row in rep_cmte_list.rows:
    #    rep_cmte_id_list.append(row['CMTE_ID'])
    #dem_cmte_id_list = []
    #for row in dem_cmte_list.rows:
    #    dem_cmte_id_list.append(row['CMTE_ID'])
    rep_cmte_id_list = [
        'C00579458', 'C00573519', 'C00580399', 'C00574624', 'C00577312',
        'C00578757', 'C00577981', 'C00581876', 'C00575449', 'C00458844',
        'C00578492', 'C00580100', 'C00580480'
    ]
    # Jeb Bush = C00579458
    # Carson = C00573519
    # Christie = C00580399
    # Cruz = C00574624
    # Fiorino = C00577312
    # Graham = C00578757
    # Huckabee = C00577981
    # Kasich = C00581876
    # Paul = C00575449
    # Rubio = C00458844
    # Santorum = C00578492
    # Trump = C00580100
    # Walker = C00580480

    dem_cmte_id_list = [
        'C00575795', 'C00583146', 'C00578658', 'C00577130', 'C00581215'
    ]
    # Clinton = C00575795
    # Lessig = C00583146
    # OMalley = C00578658
    # Sanders = C00577130
    # Webb = C00581215

    # Run through all the individual contributions and pull out the ones made
    # to republican committees and then those made to democratic committees.

    rep_contributions = ky_candidate_contributions.where(
        lambda r: r['cmte_id'] in rep_cmte_id_list)
    dem_contributions = ky_candidate_contributions.where(
        lambda r: r['cmte_id'] in dem_cmte_id_list)

    rep_contrib_count = rep_contributions.aggregate(agate.Count())
    rep_contrib_sum = rep_contributions.aggregate(
        agate.Sum('contb_receipt_amt'))
    print(
        str(rep_contrib_count) +
        ' contributions to Republican committees, totaling $' +
        str(rep_contrib_sum))
    generated_js.write('to_republicans = ' + str(rep_contrib_sum) + '\n')

    dem_contrib_count = dem_contributions.aggregate(agate.Count())
    dem_contrib_sum = dem_contributions.aggregate(
        agate.Sum('contb_receipt_amt'))
    print(
        str(dem_contrib_count) +
        ' contributions to Democratic committees, totaling $' +
        str(dem_contrib_sum))
    generated_js.write('to_democrats = ' + str(dem_contrib_sum) + '\n')
示例#27
0
def candidate_time_charts():
    os.remove('app/data/candidate_charts.js')
    text_type = agate.Text()
    datetime_type = agate.DateTime()
    chart_js = open('app/data/candidate_charts.js', 'a')

    candidate_contribs_with_monthyear = ky_candidate_contributions.compute([
        ('month_year',
         agate.Formula(text_type, lambda r: r['contb_receipt_dt'][-6:])),
        ('date',
         agate.Formula(
             text_type, lambda r: datetime.datetime.strptime(
                 r['contb_receipt_dt'], '%d-%b-%y')))
    ])

    date_sorted_candidat_contribs = candidate_contribs_with_monthyear.order_by(
        'date')
    restricted_date_candidate_contribs = date_sorted_candidat_contribs.where(
        lambda r: r['date'] > '2015-02-28 00:00:00')

    by_candidate_contribs = candidate_contribs_with_monthyear.group_by(
        'cand_nm')

    # We need a list of unique candidates and a list of unique month_years
    # Then we need to say, for each month_year and each candidate, how many contributions
    # happened.
    # We only need to write one label variable for all candidates:
    # labels = ['FEB-15', 'MAR-15', etc...]
    # For each candidate, we need:
    # candidateName_series = [200, 34, 885, 123, etc...]

    # Get unique list of month_years.
    # These are our labels.
    # We'll have to figure out how to sort these
    month_years = []
    for row in restricted_date_candidate_contribs.rows:
        month_year = row['month_year']
        if month_year in month_years:
            pass
        else:
            month_years.append(str(month_year))

    # Get unique list of candidates
    candidates = []
    for row in candidate_contribs_with_monthyear.rows:
        candidate = row['cand_nm']
        if candidate in candidates:
            pass
        else:
            candidates.append(candidate)

    candidate_month_year_groups = by_candidate_contribs.group_by(
        lambda r: r['month_year'], key_name='month_year_group')

    month_year_counts = candidate_month_year_groups.aggregate([
        ('contribution_count', agate.Count()),
        ('contribution_sum', agate.Sum('contb_receipt_amt'))
    ])

    #month_year_counts.print_table(max_rows=200)

    chart_js.write('count_labels = ' + str(month_years) + '\n')

    # For each candidate, each month, we want one value for count and one value for sum
    # If these values cannot be found in the month_year_counts table, then we should record a 0
    for candidate in candidates:
        count_value_list = []
        sum_value_list = []

        for month in month_years:
            contrib_count = 0
            contrib_sum = 0
            for row in month_year_counts.rows:
                if row['cand_nm'] == candidate:

                    series_label = candidate.split(',')[0].lower()
                    if month == row['month_year_group']:
                        contrib_count = str(row['contribution_count'])
                        #contrib_count = '{:,f}'.format(row['contribution_count'])
                        contrib_count_dict = {}
                        contrib_count_dict['meta'] = str('Contributions to ' +
                                                         candidate + ' for ' +
                                                         month)
                        contrib_count_dict['value'] = contrib_count
                        count_value_list.append(dict(contrib_count_dict))

                        contrib_sum = str(row['contribution_sum'])
                        #contrib_sum = '${:,.2f}'.format(row['contribution_sum'])
                        contrib_sum_dict = {}
                        contrib_sum_dict['meta'] = str('Amt. contributed to ' +
                                                       candidate + ' for ' +
                                                       month)
                        contrib_sum_dict['value'] = contrib_sum
                        sum_value_list.append(dict(contrib_sum_dict))
                    else:
                        pass
            if contrib_count == 0:
                contrib_count_dict = {}
                contrib_count_dict['meta'] = str('Contributions to ' +
                                                 candidate + ' for ' + month)
                contrib_count_dict['value'] = '0'
                count_value_list.append(dict(contrib_count_dict))
            if contrib_sum == 0:
                contrib_sum_dict = {}
                contrib_sum_dict['meta'] = str('Amount contributed to ' +
                                               candidate + ' for ' + month)
                contrib_sum_dict['value'] = '0'
                sum_value_list.append(dict(contrib_sum_dict))

        chart_js.write(series_label + '_count_series = ' +
                       str(count_value_list) + '\n')
        chart_js.write(series_label + '_sum_series = ' + str(sum_value_list) +
                       '\n')

    chart_js.close()
示例#28
0
文件: prove.py 项目: Quartz/refugees
def count_origins(data):
    refugees = data['by_origin_2014'].aggregate([
        ('total_refugees', agate.Sum('refugees'))
    ]).order_by('total_refugees', reverse=True)

    refugees.print_table(20)
示例#29
0
# find total minutes played
unique_quarters = data_with_quarter.distinct('quarter_id').select(['quarter'])


def getMinutes(row):
    if (row['quarter'] == 4):
        return 2
    return 5


quarter_minutes = unique_quarters.compute([
    ('minutes_played', agate.Formula(agate.Number(), getMinutes))
])

minutes_played = int(quarter_minutes.aggregate(agate.Sum('minutes_played')))
num_calls_incorrect_per_minute = float(num_calls_incorrect) / float(
    minutes_played)

# export summary data

summary_column_names = ['key', 'value']
summary_column_types = [agate.Text(), agate.Text()]

summary_rows = [
    ('num_games', format(num_games, ',d')),
    ('num_calls', format(num_calls, ',d')),
    ('num_calls_incorrect', format(num_calls_incorrect, ',d')),
    ('minutes_played', format(minutes_played, ',d')),
    ('percent_incorrect', format(percent_incorrect, '.1f')),
    ('num_calls_incorrect_per_game', format(num_calls_incorrect_per_game,
示例#30
0
文件: example.py 项目: ejmurra/agate
import agate

tester = agate.TypeTester(force={'fips': agate.Text()})

table = agate.Table.from_csv('examples/realdata/ks_1033_data.csv',
                             column_types=tester)

# Question 1: What was the total cost to Kansas City area counties?

# Filter to counties containing Kansas City
kansas_city = table.where(lambda r: r['county'] in
                          ('JACKSON', 'CLAY', 'CASS', 'PLATTE'))

# Sum total_cost of four counties
print('Total for Kansas City area: %i' %
      kansas_city.aggregate(agate.Sum('total_cost')))

# Question 2: Which counties spent the most?

# Group by counties
counties = table.group_by('county')

# Aggregate totals for all counties
totals = counties.aggregate([('total_cost_sum', agate.Sum('total_cost'))])

totals = totals.order_by('total_cost_sum', reverse=True)
totals.limit(20).print_bars('county', 'total_cost_sum', width=80)

print('Five most spendy counties:')

totals.print_table(5)