def handle(self, *args, **options): url = os.getenv('MONGOHQ_URL', 'mongodb://localhost:27017/pupa') parsed = urlsplit(url) _configure_db(url, parsed.port, parsed.path[1:]) for jurisdiction_id in args: if db.jurisdictions.find({'_id': jurisdiction_id}).count(): for membership in db.memberships.find({'jurisdiction_id': jurisdiction_id}): db.people.remove({'_id': membership['person_id']}) db.memberships.remove({'jurisdiction_id': jurisdiction_id}) db.organizations.remove({'jurisdiction_id': jurisdiction_id}) else: print("Couldn't find jurisdiction_id %s" % jurisdiction_id)
def handle(self, *args, **options): def save(key, body): k = Key(bucket) k.key = key k.set_contents_from_string(body) k.set_acl('public-read') sys.path.append(os.path.abspath('scrapers')) url = os.getenv('MONGOHQ_URL', 'mongodb://localhost:27017/pupa') parsed = urlsplit(url) _configure_db(url, parsed.port, parsed.path[1:]) bucket = S3Connection().get_bucket('represent.opennorth.ca') names = { 'Parliament of Canada': 'house-of-commons', 'Legislative Assembly of Alberta': 'alberta-legislature', 'Legislative Assembly of British Columbia': 'bc-legislature', 'Legislative Assembly of Manitoba': 'manitoba-legislature', 'Legislative Assembly of New Brunswick': 'new-brunswick-legislature', 'Newfoundland and Labrador House of Assembly': 'newfoundland-labrador-legislature', 'Nova Scotia House of Assembly': 'nova-scotia-legislature', 'Legislative Assembly of Ontario': 'ontario-legislature', 'Legislative Assembly of Prince Edward Island': 'pei-legislature', 'Assemblée nationale du Québec': 'quebec-assemblee-nationale', 'Legislative Assembly of Saskatchewan': 'saskatchewan-legislature', } default_headers = [ 'District name', 'Elected office', 'Name', 'First name', 'Last name', 'Gender', 'Party name', 'Email', 'URL', 'Photo URL', 'Personal URL', ] office_headers = [ 'Office type', 'Address', 'Phone', 'Fax', ] all_rows = [] max_offices_count = 0 reports = Report.objects.filter(exception='').exclude(module__endswith='_candidates').exclude(module__endswith='_municipalities').order_by('module') for report in reports: try: module = importlib.import_module(report.module) for obj in module.__dict__.values(): jurisdiction_id = getattr(obj, 'jurisdiction_id', None) if jurisdiction_id: # We've found the module. name = getattr(obj, 'name', None) rows = [] offices_count = 0 # Exclude party memberships. for membership in db.memberships.find({'jurisdiction_id': jurisdiction_id, 'role': {'$nin': ['member', 'candidate']}}): organization = db.organizations.find_one({'_id': membership['organization_id']}) person = db.people.find_one({'_id': membership['person_id']}) party_membership = db.memberships.find_one({'jurisdiction_id': jurisdiction_id, 'role': 'member', 'person_id': membership['person_id']}) if party_membership: party_name = db.organizations.find_one({'_id': party_membership['organization_id']})['name'] else: party_name = None if person['gender'] == 'male': gender = 'M' elif person['gender'] == 'female': gender = 'F' else: gender = None if ' ' in person['name']: first_name, last_name = person['name'].rsplit(' ', 1) else: first_name, last_name = None, person['name'] # @see http://represent.opennorth.ca/api/#fields row = [ person['post_id'], # District name membership['role'], # Elected office person['name'], # Name first_name, # First name last_name, # Last name gender, # Gender party_name, # Party name next((contact_detail['value'] for contact_detail in membership['contact_details'] if contact_detail['type'] == 'email'), None), # Email person['sources'][-1]['url'] if len(person['sources']) > 1 else None, # URL person['image'], # Photo URL get_personal_url(person), # Personal URL ] offices = get_offices(membership) if len(offices) > offices_count: offices_count = len(offices) for office in offices: for key in ('type', 'postal', 'tel', 'fax'): row.append(office.get(key)) # If the person is associated to multiple boundaries. if re.search(r'\AWards \d(?:(?:,| & | and )\d+)+\Z', person['post_id']): for district_id in re.findall(r'\d+', person['post_id']): row = row[:] row[0] = 'Ward %s' % district_id rows.append(row) else: rows.append(row) rows.sort() headers = default_headers[:] for _ in range(offices_count): headers += office_headers if name in names: slug = names[name] else: slug = slugify(name) io = StringIO() body = UnicodeWriter(io, encoding='windows-1252') body.writerow(headers) body.writerows(rows) save('csv/%s.csv' % slug, io.getvalue()) if offices_count > max_offices_count: max_offices_count = offices_count for row in rows: row.insert(0, name) all_rows.append(row) except ImportError: report.delete() # delete reports for old modules headers = ['Organization'] + default_headers for _ in range(max_offices_count): headers += office_headers io = StringIO() body = UnicodeWriter(io, encoding='windows-1252') body.writerow(headers) body.writerows(all_rows) save('csv/complete.csv', io.getvalue())