def parse_committee_names(options): log.info('Processing committees') COMMITTEES_FILE = settings.CONGRESS_PROJECT_PATH + '/congress-legislators/committees-current.yaml' if not File.objects.is_changed(COMMITTEES_FILE) and not options.force: log.info('File %s was not changed' % COMMITTEES_FILE) else: tree = yaml_load(COMMITTEES_FILE) total = len(tree) progress = Progress(total=total) seen_committees = set() for committee in tree: try: cobj = Committee.objects.get(code=committee["thomas_id"]) except Committee.DoesNotExist: print("New committee:", committee["thomas_id"]) cobj = Committee(code=committee["thomas_id"]) cobj.committee_type = TYPE_MAPPING[committee["type"]] cobj.name = committee["name"] cobj.url = committee.get("url", None) cobj.obsolete = False cobj.committee = None cobj.jurisdiction = committee.get("jurisdiction") cobj.jurisdiction_link = committee.get("jurisdiction_source") cobj.save() seen_committees.add(cobj.id) for subcom in committee.get('subcommittees', []): code = committee["thomas_id"] + subcom["thomas_id"] try: sobj = Committee.objects.get(code=code) except Committee.DoesNotExist: print("New subcommittee:", code) sobj = Committee(code=code) sobj.name = subcom["name"] sobj.url = subcom.get("url", None) sobj.type = None sobj.committee = cobj sobj.obsolete = False sobj.save() seen_committees.add(sobj.id) progress.tick() # Check for non-obsolete committees in the database that aren't in our # file. other_committees = Committee.objects.filter(obsolete=False).exclude( id__in=seen_committees) if len(other_committees) > 0: print("Marking obsolete:", ", ".join(c.code for c in other_committees)) other_committees.update(obsolete=True) File.objects.save_file(COMMITTEES_FILE)
def parse_committee_names(options): log.info('Processing committees') COMMITTEES_FILE = settings.CONGRESS_PROJECT_PATH + '/congress-legislators/committees-current.yaml' if not File.objects.is_changed(COMMITTEES_FILE) and not options.force: log.info('File %s was not changed' % COMMITTEES_FILE) else: tree = yaml_load(COMMITTEES_FILE) total = len(tree) progress = Progress(total=total) seen_committees = set() for committee in tree: try: cobj = Committee.objects.get(code=committee["thomas_id"]) except Committee.DoesNotExist: print("New committee:", committee["thomas_id"]) cobj = Committee(code=committee["thomas_id"]) cobj.committee_type = TYPE_MAPPING[committee["type"]] cobj.name = committee["name"] cobj.url = committee.get("url", None) cobj.obsolete = False cobj.committee = None cobj.jurisdiction = committee.get("jurisdiction") cobj.jurisdiction_link = committee.get("jurisdiction_source") cobj.save() seen_committees.add(cobj.id) for subcom in committee.get('subcommittees', []): code = committee["thomas_id"] + subcom["thomas_id"] try: sobj = Committee.objects.get(code=code) except Committee.DoesNotExist: print("New subcommittee:", code) sobj = Committee(code=code) sobj.name = subcom["name"] sobj.url = subcom.get("url", None) sobj.type = None sobj.committee = cobj sobj.obsolete = False sobj.save() seen_committees.add(sobj.id) progress.tick() # Check for non-obsolete committees in the database that aren't in our # file. other_committees = Committee.objects.filter(obsolete=False).exclude(id__in=seen_committees) if len(other_committees) > 0: print("Marking obsolete:", ", ".join(c.code for c in other_committees)) other_committees.update(obsolete=True) File.objects.save_file(COMMITTEES_FILE)
def main(options): """ Process committees, subcommittees and members of current congress committees. """ BASE_PATH = settings.CONGRESS_LEGISLATORS_PATH meeting_processor = CommitteeMeetingProcessor() log.info('Processing committees') COMMITTEES_FILE = BASE_PATH + 'committees-current.yaml' if not File.objects.is_changed(COMMITTEES_FILE) and not options.force: log.info('File %s was not changed' % COMMITTEES_FILE) else: tree = yaml_load(COMMITTEES_FILE) total = len(tree) progress = Progress(total=total) seen_committees = set() for committee in tree: try: cobj = Committee.objects.get(code=committee["thomas_id"]) except Committee.DoesNotExist: print "New committee:", committee["thomas_id"] cobj = Committee(code=committee["thomas_id"]) cobj.committee_type = TYPE_MAPPING[committee["type"]] cobj.name = committee["name"] cobj.url = committee.get("url", None) cobj.obsolete = False cobj.committee = None cobj.save() seen_committees.add(cobj.id) for subcom in committee.get('subcommittees', []): code = committee["thomas_id"] + subcom["thomas_id"] try: sobj = Committee.objects.get(code=code) except Committee.DoesNotExist: print "New subcommittee:", code sobj = Committee(code=code) sobj.name = subcom["name"] sobj.url = subcom.get("url", None) sobj.type = None sobj.committee = cobj sobj.obsolete = False sobj.save() seen_committees.add(sobj.id) progress.tick() # Check for non-obsolete committees in the database that aren't in our # file. other_committees = Committee.objects.filter(obsolete=False).exclude(id__in=seen_committees) if len(other_committees) > 0: print "Marking obsolete:", ", ".join(c.code for c in other_committees) other_committees.update(obsolete=True) File.objects.save_file(COMMITTEES_FILE) log.info('Processing committee members') MEMBERS_FILE = BASE_PATH + 'committee-membership-current.yaml' file_changed = File.objects.is_changed(MEMBERS_FILE) if not file_changed and not options.force: log.info('File %s was not changed' % MEMBERS_FILE) else: # map THOMAS IDs to GovTrack IDs y = yaml_load(BASE_PATH + "legislators-current.yaml") person_id_map = { } for m in y: if "id" in m and "govtrack" in m["id"] and "thomas" in m["id"]: person_id_map[m["id"]["thomas"]] = m["id"]["govtrack"] # load committee members tree = yaml_load(MEMBERS_FILE) total = len(tree) progress = Progress(total=total, name='committees') # We can delete CommitteeMember objects because we don't have # any foreign keys to them. CommitteeMember.objects.all().delete() # Process committee nodes for committee, members in tree.items(): if committee[0] == "H": continue # House data is out of date try: cobj = Committee.objects.get(code=committee) except Committee.DoesNotExist: print "Committee not found:", committee continue # Process members of current committee node for member in members: mobj = CommitteeMember() mobj.person = Person.objects.get(id=person_id_map[member["thomas"]]) mobj.committee = cobj if "title" in member: mobj.role = ROLE_MAPPING[member["title"]] mobj.save() progress.tick() File.objects.save_file(MEMBERS_FILE) return log.info('Processing committee schedule') SCHEDULE_FILE = 'data/us/112/committeeschedule.xml' file_changed = File.objects.is_changed(SCHEDULE_FILE) if not file_changed and not options.force: log.info('File %s was not changed' % SCHEDULE_FILE) else: tree = etree.parse(SCHEDULE_FILE) # We have to clear out all CommitteeMeeting objects when we refresh because # we have no unique identifier in the upstream data for a meeting. We might use # the meeting's committee & date as an identifier, but since meeting times can # change this might have awkward consequences for the end user if we even # attempted to track that. CommitteeMeeting.objects.all().delete() # Process committee event nodes for meeting in tree.xpath('/committee-schedule/meeting'): try: mobj = meeting_processor.process(CommitteeMeeting(), meeting) mobj.save() mobj.bills.clear() for bill in meeting.xpath('bill'): bill = Bill.objects.get(congress=bill.get("session"), bill_type=BillType.by_xml_code(bill.get("type")), number=int(bill.get("number"))) mobj.bills.add(bill) except Committee.DoesNotExist: log.error('Could not load Committee object for meeting %s' % meeting_processor.display_node(meeting)) for committee in Committee.objects.all(): if not options.disable_events: committee.create_events() File.objects.save_file(SCHEDULE_FILE)
def main(options): """ Process committees, subcommittees and members of current congress committees. """ BASE_PATH = settings.CONGRESS_LEGISLATORS_PATH meeting_processor = CommitteeMeetingProcessor() log.info('Processing committees') COMMITTEES_FILE = BASE_PATH + 'committees-current.yaml' if not File.objects.is_changed(COMMITTEES_FILE) and not options.force: log.info('File %s was not changed' % COMMITTEES_FILE) else: tree = yaml_load(COMMITTEES_FILE) total = len(tree) progress = Progress(total=total) seen_committees = set() for committee in tree: try: cobj = Committee.objects.get(code=committee["thomas_id"]) except Committee.DoesNotExist: print "New committee:", committee["thomas_id"] cobj = Committee(code=committee["thomas_id"]) cobj.committee_type = TYPE_MAPPING[committee["type"]] cobj.name = committee["name"] cobj.url = committee.get("url", None) cobj.obsolete = False cobj.committee = None cobj.jurisdiction = committee.get("jurisdiction") cobj.jurisdiction_link = committee.get("jurisdiction_source") cobj.save() seen_committees.add(cobj.id) for subcom in committee.get('subcommittees', []): code = committee["thomas_id"] + subcom["thomas_id"] try: sobj = Committee.objects.get(code=code) except Committee.DoesNotExist: print "New subcommittee:", code sobj = Committee(code=code) sobj.name = subcom["name"] sobj.url = subcom.get("url", None) sobj.type = None sobj.committee = cobj sobj.obsolete = False sobj.save() seen_committees.add(sobj.id) progress.tick() # Check for non-obsolete committees in the database that aren't in our # file. other_committees = Committee.objects.filter(obsolete=False).exclude( id__in=seen_committees) if len(other_committees) > 0: print "Marking obsolete:", ", ".join(c.code for c in other_committees) other_committees.update(obsolete=True) File.objects.save_file(COMMITTEES_FILE) log.info('Processing committee members') MEMBERS_FILE = BASE_PATH + 'committee-membership-current.yaml' file_changed = File.objects.is_changed(MEMBERS_FILE) if not file_changed and not options.force: log.info('File %s was not changed' % MEMBERS_FILE) else: # map THOMAS IDs to GovTrack IDs y = yaml_load(BASE_PATH + "legislators-current.yaml") person_id_map = {} for m in y: if "id" in m and "govtrack" in m["id"] and "thomas" in m["id"]: person_id_map[m["id"]["thomas"]] = m["id"]["govtrack"] # load committee members tree = yaml_load(MEMBERS_FILE) total = len(tree) progress = Progress(total=total, name='committees') # We can delete CommitteeMember objects because we don't have # any foreign keys to them. CommitteeMember.objects.all().delete() # Process committee nodes for committee, members in tree.items(): try: cobj = Committee.objects.get(code=committee) except Committee.DoesNotExist: print "Committee not found:", committee continue # Process members of current committee node for member in members: mobj = CommitteeMember() mobj.person = Person.objects.get( id=person_id_map[member["thomas"]]) mobj.committee = cobj if "title" in member: mobj.role = ROLE_MAPPING[member["title"]] mobj.save() progress.tick() File.objects.save_file(MEMBERS_FILE) log.info('Processing committee schedule') for chamber in ("house", "senate"): meetings_file = 'data/congress/committee_meetings_%s.json' % chamber file_changed = File.objects.is_changed(meetings_file) if not file_changed and not options.force: log.info('File %s was not changed' % meetings_file) else: meetings = json.load(open(meetings_file)) # Process committee event nodes for meeting in meetings: try: # Associate it with an existing meeting object if GUID is already known. # Must get it like this, vs just assigning the ID as we do in other parsers, # because of the auto_now_add created field, which otherwise misbehaves. try: mobj = CommitteeMeeting.objects.get( guid=meeting['guid']) except CommitteeMeeting.DoesNotExist: mobj = CommitteeMeeting() # Parse. mobj = meeting_processor.process(mobj, meeting) # Attach the meeting to the subcommittee if set. if mobj.subcommittee: mobj.committee = Committee.objects.get( code=mobj.committee.code + mobj.subcommittee) mobj.save() mobj.bills.clear() for bill in meeting["bill_ids"]: try: bill_type, bill_num, bill_cong = re.match( r"([a-z]+)(\d+)-(\d+)$", bill).groups() bill = Bill.objects.get( congress=bill_cong, bill_type=BillType.by_slug(bill_type), number=int(bill_num)) mobj.bills.add(bill) except AttributeError: pass # regex failed except common.enum.NotFound: pass # invalid bill type code in source data except Bill.DoesNotExist: pass # we don't know about bill yet except Committee.DoesNotExist: log.error( 'Could not load Committee object for meeting %s' % meeting_processor.display_node(meeting)) for committee in Committee.objects.all(): if not options.disable_events: committee.create_events() File.objects.save_file(meetings_file)
def main(options): """ Process committees, subcommittees and members of current congress committees. """ BASE_PATH = settings.CONGRESS_LEGISLATORS_PATH meeting_processor = CommitteeMeetingProcessor() log.info('Processing committees') COMMITTEES_FILE = BASE_PATH + 'committees-current.yaml' if not File.objects.is_changed(COMMITTEES_FILE) and not options.force: log.info('File %s was not changed' % COMMITTEES_FILE) else: tree = yaml_load(COMMITTEES_FILE) total = len(tree) progress = Progress(total=total) seen_committees = set() for committee in tree: try: cobj = Committee.objects.get(code=committee["thomas_id"]) except Committee.DoesNotExist: print "New committee:", committee["thomas_id"] cobj = Committee(code=committee["thomas_id"]) cobj.committee_type = TYPE_MAPPING[committee["type"]] cobj.name = committee["name"] cobj.url = committee.get("url", None) cobj.obsolete = False cobj.committee = None cobj.jurisdiction = committee.get("jurisdiction") cobj.jurisdiction_link = committee.get("jurisdiction_source") cobj.save() seen_committees.add(cobj.id) for subcom in committee.get('subcommittees', []): code = committee["thomas_id"] + subcom["thomas_id"] try: sobj = Committee.objects.get(code=code) except Committee.DoesNotExist: print "New subcommittee:", code sobj = Committee(code=code) sobj.name = subcom["name"] sobj.url = subcom.get("url", None) sobj.type = None sobj.committee = cobj sobj.obsolete = False sobj.save() seen_committees.add(sobj.id) progress.tick() # Check for non-obsolete committees in the database that aren't in our # file. other_committees = Committee.objects.filter(obsolete=False).exclude(id__in=seen_committees) if len(other_committees) > 0: print "Marking obsolete:", ", ".join(c.code for c in other_committees) other_committees.update(obsolete=True) File.objects.save_file(COMMITTEES_FILE) log.info('Processing committee members') MEMBERS_FILE = BASE_PATH + 'committee-membership-current.yaml' file_changed = File.objects.is_changed(MEMBERS_FILE) if not file_changed and not options.force: log.info('File %s was not changed' % MEMBERS_FILE) else: # map THOMAS IDs to GovTrack IDs y = yaml_load(BASE_PATH + "legislators-current.yaml") person_id_map = { } for m in y: if "id" in m and "govtrack" in m["id"] and "thomas" in m["id"]: person_id_map[m["id"]["thomas"]] = m["id"]["govtrack"] # load committee members tree = yaml_load(MEMBERS_FILE) total = len(tree) progress = Progress(total=total, name='committees') # We can delete CommitteeMember objects because we don't have # any foreign keys to them. CommitteeMember.objects.all().delete() # Process committee nodes for committee, members in tree.items(): try: cobj = Committee.objects.get(code=committee) except Committee.DoesNotExist: print "Committee not found:", committee continue # Process members of current committee node for member in members: mobj = CommitteeMember() mobj.person = Person.objects.get(id=person_id_map[member["thomas"]]) mobj.committee = cobj if "title" in member: mobj.role = ROLE_MAPPING[member["title"]] mobj.save() progress.tick() File.objects.save_file(MEMBERS_FILE) log.info('Processing committee schedule') for chamber in ("house", "senate"): meetings_file = 'data/congress/committee_meetings_%s.json' % chamber file_changed = File.objects.is_changed(meetings_file) if not file_changed and not options.force: log.info('File %s was not changed' % meetings_file) else: meetings = json.load(open(meetings_file)) # Process committee event nodes for meeting in meetings: try: # Associate it with an existing meeting object if GUID is already known. # Must get it like this, vs just assigning the ID as we do in other parsers, # because of the auto_now_add created field, which otherwise misbehaves. try: mobj = CommitteeMeeting.objects.get(guid=meeting['guid']) except CommitteeMeeting.DoesNotExist: mobj = CommitteeMeeting() # Parse. mobj = meeting_processor.process(mobj, meeting) # Attach the meeting to the subcommittee if set. if mobj.subcommittee: mobj.committee = Committee.objects.get(code=mobj.committee.code + mobj.subcommittee) mobj.save() mobj.bills.clear() for bill in meeting["bill_ids"]: try: bill_type, bill_num, bill_cong = re.match(r"([a-z]+)(\d+)-(\d+)$", bill).groups() bill = Bill.objects.get(congress=bill_cong, bill_type=BillType.by_slug(bill_type), number=int(bill_num)) mobj.bills.add(bill) except AttributeError: pass # regex failed except common.enum.NotFound: pass # invalid bill type code in source data except Bill.DoesNotExist: pass # we don't know about bill yet except Committee.DoesNotExist: log.error('Could not load Committee object for meeting %s' % meeting_processor.display_node(meeting)) for committee in Committee.objects.all(): if not options.disable_events: committee.create_events() File.objects.save_file(meetings_file)