def download_csv(): """ Generate a data download. """ f = cStringIO.StringIO() writer = UnicodeCSVDictWriter(f, [ 'lobbyist_first_name', 'lobbyist_last_name', 'report_period', 'recipient_name', 'recipient_type', 'legislator_first_name', 'legislator_last_name', 'legislator_office', 'legislator_party', 'legislator_district', 'event_date', 'category', 'description', 'cost', 'organization_name', 'organization_industry', 'group', 'ethics_board_id', 'is_solicitation' ]) writer.writeheader() expenditures = Expenditure.select() for ex in expenditures: row = { 'lobbyist_first_name': ex.lobbyist.first_name, 'lobbyist_last_name': ex.lobbyist.last_name, 'report_period': ex.report_period, 'recipient_name': ex.recipient, 'recipient_type': ex.recipient_type, 'legislator_first_name': ex.legislator.first_name if ex.legislator else None, 'legislator_last_name': ex.legislator.last_name if ex.legislator else None, 'legislator_office': ex.legislator.office if ex.legislator else None, 'legislator_party': ex.legislator.party if ex.legislator else None, 'legislator_district': ex.legislator.district if ex.legislator else None, 'event_date': ex.event_date, 'category': ex.category, 'description': ex.description, 'cost': ex.cost, 'organization_name': ex.organization.name, 'organization_industry': ex.organization.category, 'group': ex.group.name if ex.group else None, 'ethics_board_id': ex.ethics_id, 'is_solicitation': ex.is_solicitation } writer.writerow(row) return f.getvalue().decode('utf-8')
def process_csv(csv_data, template): standard_header = [ 'name', 'gender', 'title', 'department', 'hire_date', 'salary', 'entity', 'type', 'received_date', ] if 'entity_name' in template: file_name = template['entity_name'].lower().replace(' ', '_') else: file_name = raw_input('What should we call this file? (xxx.csv)\n') with open('{0}-ready.csv'.format(file_name), 'wb') as fo: writer = UnicodeCSVDictWriter(fo, standard_header) writer.writeheader() for row in csv_data['rows']: if 'agency' in template: entity_entry = entity(collect_cells(row, template['agency']['columns'])) else: entity_entry = template['entity_name'] writer.writerow({ 'name': name(collect_cells(row, template['name']['columns']), **template['name']['options']), 'gender': gender(collect_cells(row, template['gender']['columns']), **template['gender']['options']), 'title': title_department(collect_cells(row, template['title']['columns']), **template['title']['options']), 'department': title_department(collect_cells(row, template['department']['columns']), **template['department']['options']), 'hire_date': hire_date(collect_cells(row, template['hire_date']['columns']), **template['hire_date']['options']), 'salary': salary(collect_cells(row, template['salary']['columns']), **template['salary']['options']), 'entity': entity_entry, 'type': template['entity_type'], 'received_date': template['received_date'], }) sys.stdout.write('File processed.\n')
os.remove(DB_NAME) report_pattern = '/CommitteeDetail.aspx?id=%s&pageindex=%s' report_scraper = ReportScraper(url_pattern=report_pattern) report_scraper.cache_storage = scrapelib.cache.FileCache(cache_dir) report_scraper.cache_write_only = False conn = sqlite3.connect(DB_NAME) c = conn.cursor() for comm_url in comm_urls: for report_data in report_scraper.scrape_one(comm_url): comm_id = parse_qs(urlparse(comm_url).query)['id'][0] report_data['committee_id'] = comm_id outp = StringIO() writer = UnicodeCSVDictWriter(outp, fieldnames=report_data.keys()) writer.writeheader() writer.writerow(report_data) outp.seek(0) t = Table.from_csv(outp, name='reports') sql_table = make_table(t) try: c.execute('select * from reports limit 1') except sqlite3.OperationalError: create_st = make_create_table_statement(sql_table) c.execute(create_st) conn.commit() c.execute('select * from reports where id = ?', (int(report_data['id']),)) existing = c.fetchall() if not existing: insert = sql_table.insert() headers = t.headers() rows = [dict(zip(headers, row)) for row in t.to_rows()]
def download_csv(): """ Generate a data download. """ f = cStringIO.StringIO() writer = UnicodeCSVDictWriter(f, [ 'lobbyist_first_name', 'lobbyist_last_name', 'report_period', 'recipient_name', 'recipient_type', 'legislator_first_name', 'legislator_last_name', 'legislator_office', 'legislator_party', 'legislator_district', 'event_date', 'category', 'description', 'cost', 'organization_name', 'organization_industry', 'group', 'ethics_board_id', 'is_solicitation' ]) writer.writeheader() expenditures = Expenditure.select() for ex in expenditures: row = { 'lobbyist_first_name': ex.lobbyist.first_name, 'lobbyist_last_name': ex.lobbyist.last_name, 'report_period': ex.report_period, 'recipient_name': ex.recipient, 'recipient_type': ex.recipient_type, 'legislator_first_name': ex.legislator.first_name if ex.legislator else None, 'legislator_last_name': ex.legislator.last_name if ex.legislator else None, 'legislator_office': ex.legislator.office if ex.legislator else None, 'legislator_party': ex.legislator.party if ex.legislator else None, 'legislator_district': ex.legislator.district if ex.legislator else None, 'event_date': ex.event_date, 'category': ex.category, 'description': ex.description, 'cost': ex.cost, 'organization_name': ex.organization.name, 'organization_industry': ex.organization.category, 'group': ex.group.name if ex.group else None, 'ethics_board_id': ex.ethics_id, 'is_solicitation': ex.is_solicitation } writer.writerow(row) return f.getvalue().decode('utf-8')
os.remove(DB_NAME) report_pattern = '/CommitteeDetail.aspx?id=%s&pageindex=%s' report_scraper = ReportScraper(url_pattern=report_pattern) report_scraper.cache_storage = scrapelib.cache.FileCache(cache_dir) report_scraper.cache_write_only = False conn = sqlite3.connect(DB_NAME) c = conn.cursor() for comm_url in comm_urls: for report_data in report_scraper.scrape_one(comm_url): comm_id = parse_qs(urlparse(comm_url).query)['id'][0] report_data['committee_id'] = comm_id outp = StringIO() writer = UnicodeCSVDictWriter(outp, fieldnames=report_data.keys()) writer.writeheader() writer.writerow(report_data) outp.seek(0) t = Table.from_csv(outp, name='reports') sql_table = make_table(t) try: c.execute('select * from reports limit 1') except sqlite3.OperationalError: create_st = make_create_table_statement(sql_table) c.execute(create_st) conn.commit() c.execute('select * from reports where id = ?', (int(report_data['id']), )) existing = c.fetchall() if not existing: insert = sql_table.insert() headers = t.headers()