Пример #1
0
def race_and_age(data):
    # Filters rows without age data
    only_with_age = data['with_years_in_prison'].where(
        lambda r: r['age'] is not None
    )

    # Group by race
    race_groups = only_with_age.group_by('race')

    # Sub-group by age cohorts (20s, 30s, etc.)
    race_and_age_groups = race_groups.group_by(
        lambda r: '%i0s' % (r['age'] // 10),
        key_name='age_group'
    )

    # Aggregate medians for each group
    medians = race_and_age_groups.aggregate([
        ('count', agate.Count()),
        ('median_years_in_prison', agate.Median('years_in_prison'))
    ])

    # Sort the results
    sorted_groups = medians.order_by('median_years_in_prison', reverse=True)

    # Print out the results
    sorted_groups.print_table(max_rows=10)
Пример #2
0
 def test_computing_new_columns(self):
     # 计算新的列
     exonerations = agate.Table.from_csv(
         '../../../data/exonerations-20150828.csv')
     with_years_in_prison = exonerations.compute([
         ('years_in_prison', agate.Change('convicted', 'exonerated'))
     ])
     print(with_years_in_prison.aggregate(agate.Median('years_in_prison')))
Пример #3
0
def median_age(data):
    median_age = data['exonerations'].aggregate(agate.Median('age'))

    print('Median age at time of arrest: %i' % median_age)

    data['exonerations'].bins('age', 10, 0, 100).print_bars('age', width=80)
    data['exonerations'].pivot('age').order_by('age').print_bars('age', width=80)

    data['exonerations'].bins('age').print_bars('age', width=80)
Пример #4
0
def states(data):
    state_totals = data['with_years_in_prison'].group_by('state')

    medians = state_totals.aggregate([('years_in_prison', agate.Median(),
                                       'median_years_in_prison')])

    sorted_medians = medians.order_by('median_years_in_prison', reverse=True)

    print(sorted_medians.format(max_rows=5))
Пример #5
0
def states(data):
    by_state = data['with_years_in_prison'].group_by('state')
    state_totals = by_state.aggregate([
        ('count', agate.Count())
    ])

    sorted_totals = state_totals.order_by('count', reverse=True)

    sorted_totals.print_table(max_rows=5)

    medians = by_state.aggregate([
        ('count', agate.Count()),
        ('median_years_in_prison', agate.Median('years_in_prison'))
    ])

    sorted_medians = medians.order_by('median_years_in_prison', reverse=True)

    sorted_medians.print_table(max_rows=5)
Пример #6
0
    country_json = json.load(f)

country_dict = {}
for dct in country_json:
    country_dict[dct['name']] = dct['parent']

cpi_and_cl = cpi_and_cl.compute([('continent', agate.Formula(agate.Text(), get_country))])

grp_by_cont = cpi_and_cl.group_by('continent')
print(grp_by_cont)

for cont, table in grp_by_cont.items():
    print(cont, len(table.rows))

# 눈으로 확인했을 때 아프리카와 아시아가 높은 값을 가지는 것을 확인할 수 있다.
# 하지만 이것만으로 데이터에 접근하기엔 쉽지 않다.
# 이 때 필요한 것이 집계 메서드이다.
# 국민들이 인식하는 정부 부패 및 아동 노동과 관련하여 대륙들이 어떻게 다른지 비교해보자.
agg = grp_by_cont.aggregate([
    ('cl_mean', agate.Mean('Total (%)')),
    ('cl_max', agate.Max('Total (%)')),
    ('cpi_median', agate.Median('CPI 2013 Score')),
    ('cpi_min', agate.Min('CPI 2013 Score'))
])
agg.print_table()
print()
agg.print_bars('continent', 'cl_max')

with open(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'cpi_and_cl_2.pickle'), 'wb') as f:
    pickle.dump(cpi_and_cl, f)
Пример #7
0
import agate

#tester = agate.TypeTester(force={'通道': agate.Boolean()})强制指定类型
purchases = agate.Table.from_csv('data.csv', encoding='GBK')
print(purchases)
#agate.Table.print_structure(purchases)     #两种查看列名和数据类型
print(purchases.columns['通道'])
exonerations = agate.Table.from_csv('20.csv',
                                    row_names=lambda r: '%(电池名)s' % (r))  #添加列名

print(exonerations.rows[0])
print(exonerations.rows['M45182110100D3'])
for row in exonerations.rows[:5]:
    print(row['电池名'])
median = exonerations.aggregate(agate.Median('通道'))
print(median)
with_age = exonerations.where(lambda row: row['通道'] is not None
                              )  #where方法按照lambda 函数筛选出符合条件的所有行,组成新的Table
for row in exonerations.rows[:5]:
    print(row[:])
print(len(exonerations.rows) - len(with_age.rows))
with_years_in_prison = exonerations.compute([
    ('years_in_prison', agate.Change('convicted', 'exonerated'))
])

with_years_in_prison.aggregate(agate.Median('years_in_prison'))
#print(with_age)
#by_county = purchases.group_by('电池名')
'''totals = by_county.aggregate([
    ('county_cost', agate.Sum('total_cost'))
])
Пример #8
0
def median_age(data):
    with_age = data['exonerations'].where(lambda row: row['age'] is not None)

    median_age = with_age.columns['age'].aggregate(agate.Median())

    print('Median age at time of arrest: %i' % median_age)
Пример #9
0

if __name__ == '__main__':
    data_lists = generate_test_data()

    # Create data table
    tbl = agate.Table(data_lists,
                      column_names=column_names,
                      column_types=column_types)

    # Produce summary table
    by_payband = tbl.group_by('pb')
    summary_tbl = by_payband.aggregate([('count', agate.Count()),
                                        ('sal_min', agate.Min('salary')),
                                        ('sal_max', agate.Max('salary')),
                                        ('sal_median', agate.Median('salary'))
                                        ])

    # Display summary of generated test data
    print('Model data summary:\n')
    summary_tbl.print_table()
    print()
    summary_tbl.print_bars('pb', 'count', width=40)

    # ---Generate random numbers for simulation
    new_table = _add_random_column(tbl)

    # Show distributions of new table
    rand_tbl_count = new_table.pivot('random_group')
    rand_tbl_count = rand_tbl_count.order_by('random_group')
Пример #10
0
    c = 0
    for item in data_row:
        if isinstance(item, (str, unicode)):
            data_row[c] = item.decode('utf-8', 'replace')
        c += 1

homicide_table = get_table(homicide_data, homicide_types, homicide_titles)

homicide_table = homicide_table.where(lambda x: 'rates' in x['GHO (DISPLAY)'])

hom_and_cl = africa_ranked.join(homicide_table,
                                'Countries and areas',
                                'COUNTRY (DISPLAY)',
                                inner=True)

print(hom_and_cl.columns['Numeric'].aggregate(agate.Median()))

for r in hom_and_cl.order_by('Numeric', reverse=True).rows:
    print(r['Countries and areas'], r['Numeric'], r['Total (%)'])

# Investigating CPI (Corruption perception index)

pci_workbook = xlrd.open_workbook(DATA_FOLDER +
                                  'perceived_corruption_index.xls')
pci_sheet = pci_workbook.sheets()[0]

for r in range(pci_sheet.nrows):
    print(r, pci_sheet.row_values(r))

pci_title_rows = zip(pci_sheet.row_values(1), pci_sheet.row_values(2))
pci_titles = [t[0] + ' ' + t[1] for t in pci_title_rows]