def import_bills(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, "bills", "*.json") for path in glob.iglob(pattern): with open(path) as f: data = prepare_obj(json.load(f)) bill = db.bills.find_one( {"state": data["state"], "session": data["session"], "chamber": data["chamber"], "bill_id": data["bill_id"]} ) for sponsor in data["sponsors"]: id = get_legislator_id(state, data["session"], None, sponsor["name"]) sponsor["leg_id"] = id for vote in data["votes"]: for vtype in ("yes_votes", "no_votes", "other_votes"): svlist = [] for svote in vote[vtype]: id = get_legislator_id(state, data["session"], vote["chamber"], svote) svlist.append({"name": svote, "leg_id": id}) vote[vtype] = svlist if not bill: data["created_at"] = datetime.datetime.now() data["updated_at"] = data["created_at"] data["keywords"] = list(keywordize(data["title"])) insert_with_id(data) else: data["keywords"] = list(keywordize(data["title"])) update(bill, data, db.bills)
def import_legislator(data): # Rename 'role' -> 'type' for role in data['roles']: if 'role' in role: role['type'] = role['role'] del role['role'] cur_role = data['roles'][0] spec = {'state': data['state'], 'term': cur_role['term'], 'type': cur_role['type']} if 'district' in cur_role: spec['district'] = cur_role['district'] if 'chamber' in cur_role: spec['chamber'] = cur_role['chamber'] leg = db.legislators.find_one( {'state': data['state'], 'full_name': data['full_name'], 'roles': {'$elemMatch': spec}}) if not leg: metadata = db.metadata.find_one({'_id': data['state']}) term_names = [t['name'] for t in metadata['terms']] try: index = term_names.index(cur_role['term']) if index > 0: prev_term = term_names[index - 1] spec['term'] = prev_term prev_leg = db.legislators.find_one( {'full_name': data['full_name'], 'roles': {'$elemMatch': spec}}) if prev_leg: update(prev_leg, data, db.legislators) return except ValueError: print "Invalid term: %s" % cur_role['term'] sys.exit(1) data['created_at'] = datetime.datetime.now() data['updated_at'] = datetime.datetime.now() insert_with_id(data) else: update(leg, data, db.legislators) ensure_indexes()
def import_legislator(data): # Rename 'role' -> 'type' for role in data['roles']: if 'role' in role: role['type'] = role['role'] del role['role'] cur_role = data['roles'][0] leg = db.legislators.find_one( {'state': data['state'], 'full_name': data['full_name'], 'roles': {'$elemMatch': { 'session': cur_role['session'], 'chamber': cur_role['chamber'], 'type': cur_role['type'], 'district': cur_role['district']}}}) if not leg: metadata = db.metadata.find_one({'_id': data['state']}) session_names = [s['name'] for s in metadata['sessions']] try: index = session_names.index(cur_role['session']) if index > 0: prev_session = session_names[index - 1] prev_leg = db.legislators.find_one( {'state': data['state'], 'full_name': data['full_name'], 'roles': {'$elemMatch': { 'session': prev_session, 'chamber': cur_role['chamber'], 'type': cur_role['type'], 'district': cur_role['district']}}}) if prev_leg: update(prev_leg, data, db.legislators) return except ValueError: print "Invalid session: %s" % cur_role['session'] sys.exit(1) data['created_at'] = datetime.datetime.now() data['updated_at'] = datetime.datetime.now() insert_with_id(data) else: update(leg, data, db.legislators)
def test_update(): dt = datetime.datetime.utcnow() obj1 = {'_type': 'bill', 'state': 'ex', 'field1': 'stuff', 'field2': 'original', '_locked_fields': 'field2', 'created_at': dt, 'updated_at': dt} id1 = utils.insert_with_id(obj1) obj1 = db.bills.find_one(id1) # Updating a bill with itself shouldn't cause 'updated_at' to be changed utils.update(obj1, obj1, db.bills) obj2 = db.bills.find_one({'_id': id1}) assert obj2['created_at'] == obj2['updated_at'] assert obj1['updated_at'] == obj2['updated_at'] utils.update(obj1, {'_type': 'bill', 'field1': 'more stuff', 'field2': 'a change', 'state': 'ex'}, db.bills) obj2 = db.bills.find_one({'_id': id1}) assert obj2['created_at'] != obj2['updated_at'] assert obj1['updated_at'] != obj2['updated_at'] assert obj2['field1'] == 'more stuff' # make sure locked fields don't get overwritten assert obj2['field2'] == 'original'
def import_legislator(data): # Rename 'role' -> 'type' for role in data["roles"]: if "role" in role: role["type"] = role["role"] del role["role"] cur_role = data["roles"][0] spec = {"state": data["state"], "term": cur_role["term"], "type": cur_role["type"]} if "district" in cur_role: spec["district"] = cur_role["district"] if "chamber" in cur_role: spec["chamber"] = cur_role["chamber"] leg = db.legislators.find_one( {"state": data["state"], "full_name": data["full_name"], "roles": {"$elemMatch": spec}} ) if not leg: metadata = db.metadata.find_one({"_id": data["state"]}) term_names = [t["name"] for t in metadata["terms"]] try: index = term_names.index(cur_role["term"]) if index > 0: prev_term = term_names[index - 1] spec["term"] = prev_term prev_leg = db.legislators.find_one({"full_name": data["full_name"], "roles": {"$elemMatch": spec}}) if prev_leg: update(prev_leg, data, db.legislators) return except ValueError: print "Invalid term: %s" % cur_role["term"] sys.exit(1) data["created_at"] = datetime.datetime.utcnow() data["updated_at"] = datetime.datetime.utcnow() insert_with_id(data) else: update(leg, data, db.legislators) ensure_indexes()
def test_insert_with_id(): obj1 = {'full_name': 'a test legislator', '_type': 'person', 'state': 'ex'} obj2 = {'full_name': 'another legislator', '_type': 'person', 'state': 'ex'} id_re = r'^EXL\d{6,6}$' id1 = utils.insert_with_id(obj1) assert re.match(id_re, id1) assert db.legislators.find_one({'_id': id1}) id2 = utils.insert_with_id(obj2) assert re.match(id_re, id2) assert id2 != id1 assert db.legislators.find_one({'_id': id2})
def import_committees(state): db.committees.ensure_index([('state', pymongo.ASCENDING), ('chamber', pymongo.ASCENDING)]) db.committees.ensure_index([('state', pymongo.ASCENDING), ('votesmart_id', pymongo.ASCENDING)]) types = {'upper': 'S'} if state['lower_chamber_name'].startswith('House'): types['lower'] = 'H' else: types['lower'] = 'S' for chamber, typeId in types.items(): for committee in votesmart.committee.getCommitteesByTypeState( typeId=typeId, stateId=state['_id'].upper()): parent_id = committee.parentId if parent_id == "-1": parent_id = None data = db.committees.find_one({ 'state': state['_id'], 'votesmart_id': committee.committeeId}) insert = False if not data: insert = True data = {} data.update({'state': state['_id'], 'votesmart_id': committee.committeeId, 'chamber': chamber, 'name': committee.name, 'parent_votesmart_id': parent_id, '_type': 'committee'}) if insert: insert_with_id(data) else: db.committees.save(data) import_committee_ids(state)
def import_legislator(data): data = prepare_obj(data) data['_scraped_name'] = data['full_name'] # Rename 'role' -> 'type' for role in data['roles']: if 'role' in role: role['type'] = role['role'] del role['role'] cur_role = data['roles'][0] term = cur_role['term'] prev_term = get_previous_term(data['state'], term) next_term = get_next_term(data['state'], term) spec = {'state': data['state'], 'type': cur_role['type'], 'term': {'$in': [term, prev_term, next_term]}} if 'district' in cur_role: spec['district'] = cur_role['district'] if 'chamber' in cur_role: spec['chamber'] = cur_role['chamber'] leg = db.legislators.find_one( {'state': data['state'], '_scraped_name': data['full_name'], 'roles': {'$elemMatch': spec}}) if leg: if 'old_roles' not in leg: leg['old_roles'] = {} if leg['roles'][0]['term'] == prev_term: # Move to old leg['old_roles'][leg['roles'][0]['term']] = leg['roles'] elif leg['roles'][0]['term'] == next_term: leg['old_roles'][term] = data['roles'] data['roles'] = leg['roles'] update(leg, data, db.legislators) else: insert_with_id(data)
def import_committees(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, "committees", "*.json") meta = db.metadata.find_one({"_id": state}) current_term = meta["terms"][-1]["name"] current_session = meta["terms"][-1]["sessions"][-1] paths = glob.glob(pattern) if not paths: # Not standalone committees for legislator in db.legislators.find({"roles": {"$elemMatch": {"term": current_term, "state": state}}}): for role in legislator["roles"]: if role["type"] == "committee member" and "committee_id" not in role: spec = {"state": role["state"], "chamber": role["chamber"], "committee": role["committee"]} if "subcommittee" in role: spec["subcommittee"] = role["subcommittee"] committee = db.committees.find_one(spec) if not committee: committee = spec committee["_type"] = "committee" committee["members"] = [] committee["sources"] = [] insert_with_id(committee) for member in committee["members"]: if member["leg_id"] == legislator["leg_id"]: break else: committee["members"].append( {"name": legislator["full_name"], "leg_id": legislator["leg_id"], "role": "member"} ) db.committees.save(committee, safe=True) role["committee_id"] = committee["_id"] db.legislators.save(legislator, safe=True) for path in paths: with open(path) as f: data = prepare_obj(json.load(f)) spec = {"state": state, "chamber": data["chamber"], "committee": data["committee"]} if "subcommittee" in data: spec["subcommittee"] = data["subcommittee"] committee = db.committees.find_one(spec) if not committee: insert_with_id(data) committee = data else: update(committee, data, db.committees) for member in committee["members"]: if not member["name"]: continue (pre, first, last, suff) = name_tools.split(member["name"]) leg_id = get_legislator_id(state, current_session, data["chamber"], member["name"]) if not leg_id: print "No matches for %s" % member["name"].encode("ascii", "ignore") member["leg_id"] = None continue legislator = db.legislators.find_one({"_id": leg_id}) member["leg_id"] = leg_id for role in legislator["roles"]: if ( role["type"] == "committee member" and role["term"] == current_term and role["committee_id"] == committee["_id"] ): break else: new_role = { "type": "committee member", "committee": committee["committee"], "term": current_term, "chamber": committee["chamber"], "committee_id": committee["_id"], "state": state, } if "subcommittee" in committee: new_role["subcommittee"] = committee["subcommittee"] legislator["roles"].append(new_role) legislator["updated_at"] = datetime.datetime.utcnow() db.legislators.save(legislator, safe=True) db.committees.save(committee, safe=True) print "imported %s committee files" % len(paths) link_parents(state) ensure_indexes()
def import_bills(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, 'bills', '*.json') meta = db.metadata.find_one({'_id': state}) # Build a session to term mapping sessions = {} for term in meta['terms']: for session in term['sessions']: sessions[session] = term['name'] for path in glob.iglob(pattern): with open(path) as f: data = prepare_obj(json.load(f)) bill = db.bills.find_one({'state': data['state'], 'session': data['session'], 'chamber': data['chamber'], 'bill_id': data['bill_id']}) for sponsor in data['sponsors']: id = get_legislator_id(state, data['session'], None, sponsor['name']) sponsor['leg_id'] = id for vote in data['votes']: for vtype in ('yes_votes', 'no_votes', 'other_votes'): svlist = [] for svote in vote[vtype]: id = get_legislator_id(state, data['session'], vote['chamber'], svote) svlist.append({'name': svote, 'leg_id': id}) vote[vtype] = svlist data['_term'] = sessions[data['session']] # Merge any version titles into the alternate_titles list alt_titles = set(data['alternate_titles']) for version in data['versions']: if 'title' in version: alt_titles.add(version['title']) if '+short_title' in version: alt_titles.add(version['+short_title']) try: # Make sure the primary title isn't included in the # alternate title list alt_titles.remove(data['title']) except KeyError: pass data['alternate_titles'] = list(alt_titles) if not bill: data['created_at'] = datetime.datetime.now() data['updated_at'] = data['created_at'] data['_keywords'] = list(bill_keywords(data)) insert_with_id(data) else: data['_keywords'] = list(bill_keywords(data)) update(bill, data, db.bills) populate_current_fields(state) ensure_indexes()
def import_committees(state, data_dir): data_dir = os.path.join(data_dir, state) pattern = os.path.join(data_dir, 'committees', '*.json') meta = db.metadata.find_one({'_id': state}) current_term = meta['terms'][-1]['name'] paths = glob.glob(pattern) if not paths: # Not standalone committees for legislator in db.legislators.find({ 'roles': {'$elemMatch': {'term': current_term, 'state': state}}}): for role in legislator['roles']: if (role['type'] == 'committee member' and 'committee_id' not in role): spec = {'state': role['state'], 'chamber': role['chamber'], 'committee': role['committee']} if 'subcommittee' in role: spec['subcommittee'] = role['subcommittee'] committee = db.committees.find_one(spec) if not committee: committee = spec committee['_type'] = 'committee' committee['members'] = [] insert_with_id(committee) for member in committee['members']: if member['leg_id'] == legislator['leg_id']: break else: committee['members'].append( {'name': legislator['full_name'], 'leg_id': legislator['leg_id'], 'role': 'member'}) db.committees.save(committee, safe=True) role['committee_id'] = committee['_id'] db.legislators.save(legislator, safe=True) for path in paths: with open(path) as f: data = prepare_obj(json.load(f)) spec = {'state': state, 'committee': data['committee']} if 'subcommittee' in data: spec['subcommittee'] = data['subcommittee'] committee = db.committees.find_one(spec) if not committee: insert_with_id(data) committee = data else: update(committee, data, db.committees) for member in committee['members']: if not member['legislator']: continue (pre, first, last, suff) = name_tools.split(member['legislator']) found = db.legislators.find({ 'first_name': first, 'last_name': last, 'roles': {'$elemMatch': {'term': current_term, 'state': state}}}) if found.count() > 1: print "Too many matches for %s" % member['legislator'].encode( 'ascii', 'ignore') continue elif found.count() == 0: print "No matches for %s" % member['legislator'].encode( 'ascii', 'ignore') continue legislator = found[0] member['leg_id'] = legislator['_id'] for role in legislator['roles']: if (role['type'] == 'committee member' and role['term'] == current_term and role['committee_id'] == committee['_id']): break else: new_role = {'type': 'committee member', 'committee': committee['committee'], 'term': current_term, 'chamber': committee['chamber'], 'committee_id': committee['_id'], 'state': state} if 'subcommittee' in committee: new_role['subcommittee'] = committee['subcommittee'] legislator['roles'].append(new_role) legislator['updated_at'] = datetime.datetime.now() db.legislators.save(legislator, safe=True) db.committees.save(committee, safe=True) ensure_indexes()
def test_activate_legislators(): # Previous term leg1 = { "_type": "person", "state": "ex", "roles": [ { "type": "member", "chamber": "upper", "state": "ex", "term": "2009-2010", "district": "1", "party": "Democrat", "start_date": None, "end_date": None, } ], } # Current term, no end date leg2 = { "_type": "person", "state": "ex", "roles": [ { "type": "member", "chamber": "upper", "state": "ex", "term": "2011-2012", "district": "2", "party": "Democrat", "start_date": None, "end_date": None, } ], } # Current term, end date leg3 = { "_type": "person", "state": "ex", "roles": [ { "type": "member", "chamber": "upper", "state": "ex", "term": "2011-2012", "district": "3", "party": "Democrat", "start_date": None, "end_date": datetime.datetime.now(), } ], } id1 = utils.insert_with_id(leg1) id2 = utils.insert_with_id(leg2) id3 = utils.insert_with_id(leg3) legislators.activate_legislators("ex", "2011-2012") assert "active" not in leg1 assert "district" not in leg1 assert "chamber" not in leg1 assert "party" not in leg1 leg2 = db.legislators.find_one({"_id": id2}) assert leg2["active"] == True assert leg2["district"] == "2" assert leg2["chamber"] == "upper" assert leg2["party"] == "Democrat" leg3 = db.legislators.find_one({"_id": id3}) assert "active" not in leg3 assert "district" not in leg3 assert "chamber" not in leg3 assert "party" not in leg3
def test_deactivate_legislators(): # Previous term leg1 = { "_type": "person", "state": "ex", "roles": [ { "type": "member", "chamber": "upper", "state": "ex", "term": "2009-2010", "district": "1", "party": "Democrat", "start_date": None, "end_date": None, } ], "active": True, "district": "1", "chamber": "upper", "party": "Democrat", } leg1_roles = leg1["roles"] # Current term, no end date leg2 = { "_type": "person", "state": "ex", "roles": [ { "type": "member", "chamber": "upper", "state": "ex", "term": "2011-2012", "district": "2", "party": "Democrat", "start_date": None, "end_date": None, } ], "active": True, "district": "2", "chamber": "upper", "party": "Democrat", } leg2_roles = leg2["roles"] id1 = utils.insert_with_id(leg1) id2 = utils.insert_with_id(leg2) legislators.deactivate_legislators("ex", "2011-2012") leg1 = db.legislators.find_one({"_id": id1}) assert leg1["active"] == False assert "chamber" not in leg1 assert "district" not in leg1 assert "party" not in leg1 assert leg1["roles"] == [] assert leg1["old_roles"]["2009-2010"] == leg1_roles leg2 = db.legislators.find_one({"_id": id2}) assert leg2["active"] == True assert leg2["chamber"] == "upper" assert leg2["district"] == "2" assert leg2["party"] == "Democrat" assert leg2["roles"] == leg2_roles assert "old_roles" not in leg2