def import_bills(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, "bills", "*.json") for path in glob.iglob(pattern): with open(path) as f: data = prepare_obj(json.load(f)) bill = db.bills.find_one( {"state": data["state"], "session": data["session"], "chamber": data["chamber"], "bill_id": data["bill_id"]} ) for sponsor in data["sponsors"]: id = get_legislator_id(state, data["session"], None, sponsor["name"]) sponsor["leg_id"] = id for vote in data["votes"]: for vtype in ("yes_votes", "no_votes", "other_votes"): svlist = [] for svote in vote[vtype]: id = get_legislator_id(state, data["session"], vote["chamber"], svote) svlist.append({"name": svote, "leg_id": id}) vote[vtype] = svlist if not bill: data["created_at"] = datetime.datetime.now() data["updated_at"] = data["created_at"] data["keywords"] = list(keywordize(data["title"])) insert_with_id(data) else: data["keywords"] = list(keywordize(data["title"])) update(bill, data, db.bills)
def import_votes(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, 'votes', '*.json') paths = glob.glob(pattern) for path in paths: with open(path) as f: data = prepare_obj(json.load(f)) # clean up bill_id, needs to match the one already in the database data['bill_id'] = fix_bill_id(data['bill_id']) bill = db.bills.find_one({'state': state, 'chamber': data['bill_chamber'], 'session': data['session'], 'bill_id': data['bill_id']}) if not bill: _log.warning("Couldn't find bill %s" % data['bill_id']) continue del data['bill_id'] try: del data['filename'] except KeyError: pass for vtype in ('yes_votes', 'no_votes', 'other_votes'): svlist = [] for svote in data[vtype]: id = get_legislator_id(state, data['session'], data['chamber'], svote) svlist.append({'name': svote, 'leg_id': id}) data[vtype] = svlist for vote in bill['votes']: if (vote['motion'] == data['motion'] and vote['date'] == data['date']): vote.update(data) break else: bill['votes'].append(data) db.bills.save(bill, safe=True) print 'imported %s vote files' % len(paths)
def test_get_legislator_id(): db.metadata.insert({'_id': 'ex', 'terms': [{'name': 'T1', 'sessions': ['S1']}]}) db.legislators.insert({'_id': 'EXL000042', 'state': 'ex', 'full_name': 'Ed Iron Cloud III', '_scraped_name': 'Ed Iron Cloud III', 'first_name': 'Ed', 'last_name': 'Iron Cloud', 'suffixes': 'III', 'roles': [{'type': 'member', 'state': 'ex', 'term': 'T1', 'chamber': 'upper', 'district': '10'}]}) assert names.get_legislator_id('ex', 'S1', 'upper', 'Ed Iron Cloud') == 'EXL000042' assert names.get_legislator_id('ex', 'S1', 'upper', 'Iron Cloud') == 'EXL000042' assert names.get_legislator_id('ex', 'S1', 'upper', 'E. Iron Cloud') == 'EXL000042' assert not names.get_legislator_id('ex', 'S1', 'lower', 'Ed Iron Cloud')
def import_votes(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, "votes", "*.json") for path in glob.iglob(pattern): with open(path) as f: data = prepare_obj(json.load(f)) bill = db.bills.find_one( {"state": state, "chamber": data["bill_chamber"], "session": data["session"], "bill_id": data["bill_id"]} ) if not bill: _log.warning("Couldn't find bill %s" % data["bill_id"]) continue del data["bill_id"] try: del data["filename"] except KeyError: pass for vtype in ("yes_votes", "no_votes", "other_votes"): svlist = [] for svote in data[vtype]: id = get_legislator_id(state, data["session"], data["chamber"], svote) svlist.append({"name": svote, "leg_id": id}) data[vtype] = svlist for vote in bill["votes"]: if vote["motion"] == data["motion"] and vote["date"] == data["date"]: vote.update(data) break else: bill["votes"].append(data) db.bills.save(bill, safe=True)
def import_votes(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, 'votes', '*.json') for path in glob.iglob(pattern): with open(path) as f: data = prepare_obj(json.load(f)) bill = db.bills.find_one({'state': state, 'chamber': data['bill_chamber'], 'session': data['session'], 'bill_id': data['bill_id']}) if not bill: _log.warning("Couldn't find bill %s" % data['bill_id']) continue del data['bill_id'] del data['filename'] for vtype in ('yes_votes', 'no_votes', 'other_votes'): svlist = [] for svote in data[vtype]: id = get_legislator_id(state, data['session'], data['chamber'], svote) svlist.append({'name': svote, 'leg_id': id}) data[vtype] = svlist for vote in bill['votes']: if (vote['motion'] == data['motion'] and vote['date'] == data['date']): vote.update(data) break else: bill['votes'].append(data) db.bills.save(bill, safe=True)
def import_committees(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, "committees", "*.json") meta = db.metadata.find_one({"_id": state}) current_term = meta["terms"][-1]["name"] current_session = meta["terms"][-1]["sessions"][-1] paths = glob.glob(pattern) if not paths: # Not standalone committees for legislator in db.legislators.find({"roles": {"$elemMatch": {"term": current_term, "state": state}}}): for role in legislator["roles"]: if role["type"] == "committee member" and "committee_id" not in role: spec = {"state": role["state"], "chamber": role["chamber"], "committee": role["committee"]} if "subcommittee" in role: spec["subcommittee"] = role["subcommittee"] committee = db.committees.find_one(spec) if not committee: committee = spec committee["_type"] = "committee" committee["members"] = [] committee["sources"] = [] insert_with_id(committee) for member in committee["members"]: if member["leg_id"] == legislator["leg_id"]: break else: committee["members"].append( {"name": legislator["full_name"], "leg_id": legislator["leg_id"], "role": "member"} ) db.committees.save(committee, safe=True) role["committee_id"] = committee["_id"] db.legislators.save(legislator, safe=True) for path in paths: with open(path) as f: data = prepare_obj(json.load(f)) spec = {"state": state, "chamber": data["chamber"], "committee": data["committee"]} if "subcommittee" in data: spec["subcommittee"] = data["subcommittee"] committee = db.committees.find_one(spec) if not committee: insert_with_id(data) committee = data else: update(committee, data, db.committees) for member in committee["members"]: if not member["name"]: continue (pre, first, last, suff) = name_tools.split(member["name"]) leg_id = get_legislator_id(state, current_session, data["chamber"], member["name"]) if not leg_id: print "No matches for %s" % member["name"].encode("ascii", "ignore") member["leg_id"] = None continue legislator = db.legislators.find_one({"_id": leg_id}) member["leg_id"] = leg_id for role in legislator["roles"]: if ( role["type"] == "committee member" and role["term"] == current_term and role["committee_id"] == committee["_id"] ): break else: new_role = { "type": "committee member", "committee": committee["committee"], "term": current_term, "chamber": committee["chamber"], "committee_id": committee["_id"], "state": state, } if "subcommittee" in committee: new_role["subcommittee"] = committee["subcommittee"] legislator["roles"].append(new_role) legislator["updated_at"] = datetime.datetime.utcnow() db.legislators.save(legislator, safe=True) db.committees.save(committee, safe=True) print "imported %s committee files" % len(paths) link_parents(state) ensure_indexes()
def import_bills(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, 'bills', '*.json') meta = db.metadata.find_one({'_id': state}) # Build a session to term mapping sessions = {} for term in meta['terms']: for session in term['sessions']: sessions[session] = term['name'] for path in glob.iglob(pattern): with open(path) as f: data = prepare_obj(json.load(f)) bill = db.bills.find_one({'state': data['state'], 'session': data['session'], 'chamber': data['chamber'], 'bill_id': data['bill_id']}) for sponsor in data['sponsors']: id = get_legislator_id(state, data['session'], None, sponsor['name']) sponsor['leg_id'] = id for vote in data['votes']: for vtype in ('yes_votes', 'no_votes', 'other_votes'): svlist = [] for svote in vote[vtype]: id = get_legislator_id(state, data['session'], vote['chamber'], svote) svlist.append({'name': svote, 'leg_id': id}) vote[vtype] = svlist data['_term'] = sessions[data['session']] # Merge any version titles into the alternate_titles list alt_titles = set(data['alternate_titles']) for version in data['versions']: if 'title' in version: alt_titles.add(version['title']) if '+short_title' in version: alt_titles.add(version['+short_title']) try: # Make sure the primary title isn't included in the # alternate title list alt_titles.remove(data['title']) except KeyError: pass data['alternate_titles'] = list(alt_titles) if not bill: data['created_at'] = datetime.datetime.now() data['updated_at'] = data['created_at'] data['_keywords'] = list(bill_keywords(data)) insert_with_id(data) else: data['_keywords'] = list(bill_keywords(data)) update(bill, data, db.bills) populate_current_fields(state) ensure_indexes()