def load_new_files(state): new_db_ids = set() for data, _ in itertools.chain(iter_objects(state, "people"), iter_objects(state, "retired")): for ids in data.get("other_identifiers", []): if ids["scheme"] == "legacy_openstates": new_db_ids.add(ids["identifier"]) return new_db_ids
def generate_template_csv(abbreviations, filename, missing_id=None): fields = ('id', 'name', 'chamber', 'district', 'jurisdiction') with open(filename, 'w') as outfile: out = csv.DictWriter(outfile, fields) out.writeheader() for abbr in abbreviations: for person, filename in iter_objects(abbr, 'people'): skip = False if missing_id: for oid in person.get('other_identifiers', []): if oid['scheme'] == missing_id: skip = True break if not skip: for role in person['roles']: if role_is_active(role): break else: raise Exception() out.writerow({ 'id': person['id'], 'name': person['name'], 'chamber': role['type'], 'district': role['district'], 'jurisdiction': role['jurisdiction'], })
def generate_template_csv(abbreviations, filename, missing_id=None): fields = ("id", "name", "chamber", "district", "jurisdiction") with open(filename, "w") as outfile: out = csv.DictWriter(outfile, fields) out.writeheader() for abbr in abbreviations: for person, filename in iter_objects(abbr, "people"): skip = False if missing_id: for oid in person.get("other_identifiers", []): if oid["scheme"] == missing_id: skip = True break if not skip: for role in person["roles"]: if role_is_active(role): break else: raise Exception() out.writerow({ "id": person["id"], "name": person["name"], "chamber": role["type"], "district": role["district"], "jurisdiction": role["jurisdiction"], })
def download_state_images(abbr, skip_existing): for person, _ in iter_objects(abbr, "legislature"): url = person.get("image") person_id = person["id"] if not url: continue img_bytes = upload(lambda: download_image(url), f"images/original/{person_id}", skip_existing) # if the image got skipped, we can't do the resizes either, this means if we add new # profiles we need to run with --no-skip-existing if not img_bytes: continue # resize image so largest dimension is 200px upload(lambda: resize_image(img_bytes, 200), f"images/small/{person_id}", skip_existing)
def dir_to_mongo(abbr, create, clear_old_roles, verbose): db = pymongo.MongoClient(os.environ.get('BILLY_MONGO_HOST', 'localhost'))['fiftystates'] metadata = db.metadata.find({'_id': abbr})[0] latest_term = metadata['terms'][-1]['name'] active_ids = [] for person, filename in iter_objects(abbr, 'people'): legacy_ids = [ oid['identifier'] for oid in person.get('other_identifiers', []) if oid['scheme'] == 'legacy_openstates' ] if not legacy_ids: if create: # get next ID new_id = get_next_id(db, abbr) legacy_ids = [new_id] if 'other_identifiers' not in person: person['other_identifiers'] = [] person['other_identifiers'].append({ 'scheme': 'legacy_openstates', 'identifier': new_id }) dump_obj(person, filename=filename) else: click.secho( f'{filename} does not have legacy ID, run with --create', fg='red') sys.exit(1) active_ids.append(legacy_ids[0]) # handle name prefix, first_name, last_name, suffixes = name_tools.split( person['name']) # get chamber, district, party for role in person['roles']: if role_is_active(role): chamber = role['type'] district = role['district'] break for role in person['party']: if role_is_active(role): party = role['name'] url = person['links'][0]['url'] email = '' offices = [] for cd in person.get('contact_details', []): office = { 'fax': cd.get('fax'), 'phone': cd.get('voice'), 'address': cd.get('address'), 'email': cd.get('email'), 'name': cd['note'], 'type': 'capitol' if 'capitol' in cd['note'].lower() else 'district' } offices.append(office) if office['email'] and not email: email = office['email'] # NE & DC if chamber == 'legislature': chamber = 'upper' # get some old data to keep around created_at = datetime.datetime.utcnow() old_roles = {} old_person = None try: old_person = db.legislators.find({'_id': legacy_ids[0]})[0] created_at = old_person['created_at'] if not clear_old_roles: old_roles = old_person.get('old_roles', {}) except IndexError: pass mongo_person = { '_id': legacy_ids[0], 'leg_id': legacy_ids[0], '_all_ids': legacy_ids, '_type': 'person', 'active': True, 'full_name': person['name'], '_scraped_name': person['name'], 'photo_url': person.get('image'), 'state': abbr, 'district': district, 'chamber': chamber, 'party': party, 'email': email, 'url': url, 'offices': offices, 'created_at': created_at, 'first_name': first_name, 'middle_name': '', 'last_name': last_name, 'suffixes': suffixes, 'sources': person['sources'], 'old_roles': old_roles, 'roles': [ { 'term': latest_term, 'district': district, 'chamber': chamber, 'state': abbr, 'party': party, 'type': 'member', 'start_date': None, 'end_date': None }, ], } # TODO: committee info # { "term" : "2017-2018", "committee_id" : "NCC000233", "chamber" : "lower", # "state" : "nc", "subcommittee" : null, "committee" : "State and Local Government II", # "position" : "member", "type" : "committee member" }, # compare if old_person: old_person.pop('updated_at', None) if old_person == mongo_person: if verbose: click.secho(f'no updates to {mongo_person["_id"]}') else: # print(mongo_person, old_person) # raise Exception() click.secho(f'updating {mongo_person["_id"]}', fg='green') mongo_person['updated_at'] = datetime.datetime.utcnow() try: db.legislators.save(mongo_person) except Exception as e: print(e) continue to_retire = db.legislators.find({ '_id': { '$nin': active_ids }, 'state': abbr }) click.secho(f'going to try to retire {to_retire.count()}') for leg in to_retire: retire_person(db, leg)