def test_abbr_to_jid(): assert abbr_to_jid( "nc") == "ocd-jurisdiction/country:us/state:nc/government" assert abbr_to_jid( "dc") == "ocd-jurisdiction/country:us/district:dc/government" assert abbr_to_jid( "pr") == "ocd-jurisdiction/country:us/territory:pr/government"
def yield_state_sessions(state: str, session: Optional[str]): """ parse provided options to get a list of sessions to process """ if state == "all" or state == "all_sessions": scrape_state = state for state in states: sessions = sorted( s.identifier for s in sessions_with_bills(abbr_to_jid(state.abbr)) ) if len(sessions) > 0: state = state.abbr.lower() if scrape_state == "all_sessions": for session in sessions: yield state, session else: session = sessions[-1] yield state, session elif session: # state and session, yield once yield state, session else: # single state sessions = sorted(s.identifier for s in sessions_with_bills(abbr_to_jid(state))) for session in sessions: yield state, session
def get_available_sessions(state, updated_since=0): if updated_since: sessions = [ b for b in Bill.objects.filter( legislative_session__jurisdiction_id=abbr_to_jid(state), updated_at__gte=datetime.datetime.now() - datetime.timedelta(days=updated_since), ).values_list("legislative_session__identifier", flat=True).distinct() ] else: sessions = [ s.identifier for s in LegislativeSession.objects.filter( jurisdiction_id=abbr_to_jid(state)) ] return sorted(sessions)
def handle(self, *args, **options): from django.conf import settings print("DEBUG", settings.DEBUG) state = options["state"] # 'all' grabs the first session from every state # 'all_sessions' grabs every session from every state if state == "all" or state == "all_sessions": scrape_state = state for state in states: sessions = sessions_with_bills(abbr_to_jid(state.abbr)) if len(sessions) > 0: state = state.abbr.lower() if scrape_state == "all_sessions": for session in sessions: session = session.identifier create_dqr(state, session) else: session = sessions[0].identifier create_dqr(state, session) else: sessions = get_available_sessions(state) for session in sessions: create_dqr(state, session)
def bill_detail( request, abbr=None, session=None, bill_id=None, chamber=None, billy_bill_id=None, ): if abbr: jid = abbr_to_jid(abbr) params = { 'legislative_session__jurisdiction_id': jid, 'legislative_session__identifier': session, 'identifier': bill_id } elif billy_bill_id: params = {'legacy_mapping__legacy_id': billy_bill_id} if chamber: if abbr in ('ne', 'dc') and chamber == 'upper': chamber = 'legislature' params['from_organization__classification'] = chamber bills = bill_qs(include_votes=True) bill = get_object_or_404(bills, **params) return JsonResponse(convert_bill(bill, include_votes=True))
def people_matcher(request, state, session=None): jid = abbr_to_jid(state) all_sessions = sessions_with_bills(jid) if all_sessions: session = all_sessions[0] else: session = get_object_or_404(LegislativeSession, identifier=session, jurisdiction_id=jid) unmatched = UnmatchedName.objects.filter( session_id=session, status="U").order_by("-sponsorships_count") state_sponsors = Person.objects.filter(current_jurisdiction_id=jid) unmatched_total = unmatched.count() context = { "state": state, "session": session, "all_sessions": all_sessions, "unmatched": unmatched, "state_sponsors": state_sponsors, "unmatched_total": unmatched_total, } return render(request, "people_admin/people_matcher.html", context)
def update_unmatched(abbr: str, session: str) -> int: session = LegislativeSession.objects.get(jurisdiction_id=abbr_to_jid(abbr), identifier=session) missing_sponsorships = check_sponsorships(session) missing_votes = check_votes(session) all_names = set(missing_sponsorships) | set(missing_votes) # delete rows that no longer exist UnmatchedName.objects.filter(session=session).exclude( name__in=all_names).delete() n = 0 for name in all_names: UnmatchedName.objects.update_or_create( session=session, name=name, # update numbers, but don't update status/match if it is already set defaults=dict( sponsorships_count=missing_sponsorships.get(name, 0), votes_count=missing_votes.get(name, 0), ), ) n += 1 return n
def get_filter_options(self, state): options = {} jid = abbr_to_jid(state) bills = Bill.objects.all().filter( legislative_session__jurisdiction_id=jid) chambers = get_chambers_from_abbr(state) options["chambers"] = {c.classification: c.name for c in chambers} options["sessions"] = { s.identifier: s.name for s in sessions_with_bills(jid) } options["sponsors"] = { p.id: p.name for p in Person.objects.filter( memberships__organization__jurisdiction_id=jid).order_by( "name").distinct() } options["classifications"] = sorted( bills.annotate( type=Unnest("classification", distinct=True)).values_list( "type", flat=True).distinct()) options["subjects"] = sorted( bills.annotate(sub=Unnest("subject", distinct=True)).values_list( "sub", flat=True).distinct()) return options
def create_dqr(state, session): chambers = get_chambers_from_abbr(state) for chamber in chambers: print(f"creating report for {chamber} in {state} {session}") if bill_qs(state, session, chamber).count() > 0: bills_per_session_data = total_bills_per_session( state, session, chamber) average_num_data = average_number_data(state, session, chamber) bill_version_data = bills_versions(state, session, chamber) no_sources_data = no_sources(state, session, chamber) bill_subjects_data = bill_subjects(state, session, chamber) bill_vote_data = vote_data(state, session, chamber) # update or save the report leg_session = LegislativeSession.objects.get( identifier=session, jurisdiction_id=abbr_to_jid(state)) DataQualityReport.objects.update_or_create( session=leg_session, chamber=chamber.classification, defaults={ **bills_per_session_data, **average_num_data, **bill_version_data, **no_sources_data, **bill_vote_data, **bill_subjects_data, }, )
def legislator_list(request, geo=False): abbr = request.GET.get('state') chamber = request.GET.get('chamber') district = request.GET.get('district') filter_params = current_role_filters() if geo: latitude = request.GET.get('lat') longitude = request.GET.get('long') if not latitude or not longitude: return JsonResponse("Bad Request: must include lat & long", status=400, safe=False) today = datetime.date.today().isoformat() filter_params += [ Q(memberships__post__division__geometries__boundary__shape__contains=( Point(float(longitude), float(latitude)) )), Q(memberships__post__division__geometries__boundary__set__end_date=None) | Q(memberships__post__division__geometries__boundary__set__end_date__gt=today) ] if abbr: jid = abbr_to_jid(abbr) filter_params.append(Q(memberships__organization__jurisdiction_id=jid)) if chamber: filter_params.append(Q(memberships__organization__classification=chamber)) if district: filter_params.append(Q(memberships__post__label=district)) people = person_qs().filter(*filter_params).distinct() return JsonResponse([convert_legislator(l) for l in people], safe=False)
def create_dqr(state, session): bills = load_bills(state, session) chambers = get_chambers_from_abbr(state) for chamber in chambers: if bills.filter(from_organization=chamber).count() > 0: bills_per_session_data = total_bills_per_session(bills, chamber) average_num_data = average_number_data(bills, chamber) bill_version_data = bills_versions(bills, chamber) no_sources_data = no_sources(bills, chamber) bill_subjects_data = bill_subjects(bills, chamber) bill_vote_data = vote_data(bills, chamber) # Grabbing the Legislative Session object leg_session = LegislativeSession.objects.get( identifier=session, jurisdiction_id=abbr_to_jid(state)) DataQualityReport.objects.update_or_create( session=leg_session, chamber=chamber.classification, defaults={ **bills_per_session_data, **average_num_data, **bill_version_data, **no_sources_data, **bill_vote_data, **bill_subjects_data, }, )
def bill_qs(state, session, chamber): bills = Bill.objects.filter( legislative_session__jurisdiction_id=abbr_to_jid(state), legislative_session__identifier=session, from_organization=chamber, ) return bills
def unmatched_to_deltas(abbr: str) -> int: bot_user, _ = User.objects.get_or_create(username="******") names = list( UnmatchedName.objects.filter( session__jurisdiction_id=abbr_to_jid(abbr), status=NameStatus.MATCHED_PERSON, matched_person_id__isnull=False, )) # bail without any work if there aren't names to consider if not names: return 0 delta_set, created = DeltaSet.objects.get_or_create( name=f"{abbr.upper()} legislator matching", pr_url="", created_by=bot_user, ) delta_set.person_deltas.all().delete() # build list of changes for each person person_changes = defaultdict(list) for name in names: person_changes[name.matched_person_id].append( ["append", "other_names", { "name": name.name }]) for person_id, changes in person_changes.items(): PersonDelta.objects.create(person_id=person_id, delta_set=delta_set, data_changes=changes) return len(person_changes)
def dqr_listing(request): state_dqr_data = {} for state in states: try: session = sessions_with_bills(abbr_to_jid(state.abbr))[0] except KeyError: continue dashboards = list( DataQualityReport.objects.filter( session=session).order_by("chamber")) session_name = session.name # if there are two, lower is first (b/c of ordering above), otherwise figure it out if len(dashboards) == 2: lower_dashboard, upper_dashboard = dashboards elif len(dashboards) == 1: if dashboards[0].chamber == "lower": lower_dashboard = dashboards[0] upper_dashboard = None else: upper_dashboard = dashboards[0] lower_dashboard = None state_dqr_data[state.abbr.lower()] = { "state": state.name, "session_name": session_name, "lower_dashboard": lower_dashboard, "upper_dashboard": upper_dashboard, } return render(request, "dashboards/dqr_listing.html", {"state_dqr_data": state_dqr_data})
def dqr_listing(request): state_dqr_data = {} for state in states: session = sessions_with_bills(abbr_to_jid(state.abbr)) abbr = state.abbr.lower() lower_dashboard = [] upper_dashboard = [] session_name = "" if len(session) > 0: dashboards = DataQualityReport.objects.filter(session=session[0]) if dashboards.count() > 0: session_name = session[0].name # Nebraska only has one legislature if abbr == "ne" or abbr == "dc": lower_dashboard = dashboards.filter( session=session[0], chamber="legislature")[0] else: lower_dashboard = dashboards.filter(session=session[0], chamber="lower")[0] if dashboards.filter(session=session[0], chamber="upper").count() > 0: upper_dashboard = dashboards.filter(session=session[0], chamber="upper")[0] state_dqr_data[abbr] = { "state": state.name, "session_name": session_name, "lower_dashboard": lower_dashboard, "upper_dashboard": upper_dashboard, } return render(request, "dashboards/dqr_listing.html", { "state_dqr_data": state_dqr_data, })
def get_bills(self, request, state): jid = abbr_to_jid(state) bills = Bill.objects.all().select_related( "legislative_session", "legislative_session__jurisdiction", "billstatus") bills = bills.filter(legislative_session__jurisdiction_id=jid) # query parameter filtering query = request.GET.get("query", "") chamber = request.GET.get("chamber") session = request.GET.get("session") sponsor = request.GET.get("sponsor") classification = request.GET.get("classification") q_subjects = request.GET.getlist("subjects") status = request.GET.getlist("status") form = { "query": query, "chamber": chamber, "session": session, "sponsor": sponsor, "classification": classification, "subjects": q_subjects, "status": status, } if query: if re.match(r"\w{1,3}\s*\d{1,5}", query): bills = bills.filter(identifier=fix_bill_id(query)) else: bills = bills.filter(searchable__search_vector=SearchQuery( query, search_type="web", config="english")) if chamber: bills = bills.filter(from_organization__classification=chamber) if session: bills = bills.filter(legislative_session__identifier=session) if sponsor: bills = bills.filter(sponsorships__person_id=sponsor) if classification: bills = bills.filter(classification__contains=[classification]) if q_subjects: bills = bills.filter(subject__overlap=q_subjects) if "passed-lower-chamber" in status: bills = bills.filter( actions__classification__contains=["passage"], actions__organization__classification="lower", ) elif "passed-upper-chamber" in status: bills = bills.filter( actions__classification__contains=["passage"], actions__organization__classification="upper", ) elif "signed" in status: bills = bills.filter( actions__classification__contains=["executive-signature"]) bills = bills.order_by("-billstatus__latest_action_date") return bills, form
def resolve_bill( self, info, id=None, jurisdiction=None, session=None, identifier=None, openstatesUrl=None, ): bill = None if jurisdiction and session and identifier: query = dict(legislative_session__identifier=session, identifier=identifier) query.update(jurisdiction_query(jurisdiction)) bill = Bill.objects.get(**query) if id: bill = Bill.objects.get(id=id) if openstatesUrl: # remove domain, start and end slashes path = urlparse(openstatesUrl).path.strip("/") # parse openstatesUrl into state abbr, session, and bill_id m = re.match( r"(?P<abbr>\w+)/bills/(?P<session>.+)/(?P<bill_id>.+)", path) if m: jid = abbr_to_jid(m["abbr"]) identifier = fix_bill_id(m["bill_id"]) session = m["session"] # query Bill with components # (this bit taken from def bill in views/bills.py) bill = Bill.objects.select_related( "legislative_session", "legislative_session__jurisdiction", "from_organization", ).get( legislative_session__jurisdiction_id=jid, legislative_session__identifier=session, identifier=identifier, ) else: raise ValueError( "Unable to parse openstatesUrl. openstatesUrl may be malformed." ) if not bill: raise ValueError( "must either pass 'id', 'openstatesUrl', or 'jurisdiction', " "'session', and 'identifier' together") return bill
def district_list(request, abbr, chamber=None): jid = abbr_to_jid(abbr) if chamber is None: posts = Post.objects.filter( organization__jurisdiction_id=jid, organization__classification__in=('upper', 'lower', 'legislature')) else: posts = Post.objects.filter(organization__jurisdiction_id=jid, organization__classification=chamber) posts = posts.select_related('organization') return JsonResponse([convert_post(p) for p in posts], safe=False)
def legislator_list(request, geo=False): abbr = request.GET.get("state") chamber = request.GET.get("chamber") district = request.GET.get("district") today = datetime.date.today().isoformat() filter_params = [ Q(memberships__start_date="") | Q(memberships__start_date__lte=today), Q(memberships__end_date="") | Q(memberships__end_date__gte=today), ] if geo: latitude = request.GET.get("lat") longitude = request.GET.get("long") if not latitude or not longitude: return JsonResponse("Bad Request: must include lat & long", status=400, safe=False) try: latitude = float(latitude) longitude = float(longitude) except ValueError: return JsonResponse("Bad Request: invalid lat, lon", status=400, safe=False) today = datetime.date.today().isoformat() filter_params += [ Q(memberships__post__division__geometries__boundary__shape__contains =(Point(longitude, latitude))), Q(memberships__post__division__geometries__boundary__set__end_date= None) | Q(memberships__post__division__geometries__boundary__set__end_date__gt =today), ] if abbr: jid = abbr_to_jid(abbr) filter_params.append(Q(memberships__organization__jurisdiction_id=jid)) if chamber: filter_params.append( Q(memberships__organization__classification=chamber)) if district: filter_params.append(Q(memberships__post__label=district)) people = person_qs().filter(*filter_params).distinct() return JsonResponse([convert_legislator(leg) for leg in people], safe=False)
def people_list(request, state): jid = abbr_to_jid(state) current_people = [ person_data(p) for p in Person.objects.filter(current_jurisdiction_id=jid, current_role__isnull=False).order_by( "family_name", "name") ] context = { "current_people": current_people, } return render(request, "people_admin/person_list.html", {"context": context})
def load_bills(state, session): bills = Bill.objects.filter( legislative_session__jurisdiction_id=abbr_to_jid(state), legislative_session__identifier=session, ).prefetch_related( "actions", "sponsorships", "votes", "votes__counts", "sources", "documents", "versions", "votes__votes", ) return bills
def export_session_json(state, session): sobj = LegislativeSession.objects.get(jurisdiction_id=abbr_to_jid(state), identifier=session) bills = [ _bill_to_json(b) for b in Bill.objects.filter(legislative_session=sobj).select_related( "legislative_session", "legislative_session__jurisdiction", "from_organization", "searchable", ).prefetch_related( "abstracts", "other_titles", "other_identifiers", "actions", "related_bills", "sponsorships", "documents", "documents__links", "versions", "versions__links", "sources", "votes", "votes__counts", "votes__votes", ) ] random = _str_uuid() filename = f"/tmp/{state}_{session}_json_{random}.zip" zf = zipfile.ZipFile(filename, "w") ts = datetime.datetime.utcnow() zf.writestr( "README", f"""Open States Data Export State: {state} Session: {session} Generated At: {ts} JSON Format Version: 1.0 """, ) if export_json(f"{state}/{session}/{state}_{session}_bills.json", bills, zf): return filename
def handle(self, *args, **options): state = options["state"] sessions = [ s.identifier for s in LegislativeSession.objects.filter( jurisdiction_id=abbr_to_jid(state)) ] if options["all"]: options["sessions"] = sessions if not options["sessions"]: print("available sessions:") for session in sessions: print(" ", session) else: for session in options["sessions"]: if session in sessions: filename = export_session(state, session) if filename: upload_and_publish(state, session, filename)
def upload_and_publish(state, session, filename): sobj = LegislativeSession.objects.get(jurisdiction_id=abbr_to_jid(state), identifier=session) s3 = boto3.client("s3") BULK_S3_BUCKET = "data.openstates.org" BULK_S3_PATH = "csv/latest/" s3_url = f"https://{BULK_S3_BUCKET}/{BULK_S3_PATH}{filename}" s3.upload_file( filename, BULK_S3_BUCKET, BULK_S3_PATH + filename, ExtraArgs={"ACL": "public-read"}, ) print("uploaded", s3_url) obj, created = DataExport.objects.update_or_create( session=sobj, defaults=dict(url=s3_url))
def dq_overview_session(request, state, session): jid = abbr_to_jid(state) all_sessions = sessions_with_bills(jid) session = LegislativeSession.objects.get(identifier=session, jurisdiction_id=jid) dashboards = DataQualityReport.objects.filter(session=session) chambers = get_chambers_from_abbr(state) context = { "state": state, "chambers": chambers, "session": session, "all_sessions": all_sessions, "dashboards": dashboards, } return render(request, "dashboards/dqr_page.html", context)
def dq_overview(request, state): jid = abbr_to_jid(state) all_sessions = sessions_with_bills(jid) dashboards = [] session = "Dashboards Not Generated Yet" if all_sessions: session = all_sessions[0] dashboards = DataQualityReport.objects.filter(session=session) chambers = get_chambers_from_abbr(state) context = { "state": state, "chambers": chambers, "session": session, "all_sessions": all_sessions, "dashboards": dashboards, } return render(request, "dashboards/dqr_page.html", context)
def upload_and_publish(state, session, filename, data_type): sobj = LegislativeSession.objects.get(jurisdiction_id=abbr_to_jid(state), identifier=session) s3 = boto3.client("s3") BULK_S3_BUCKET = "data.openstates.org" basename = os.path.basename(filename) s3_path = f"{data_type}/latest/" s3_url = f"https://{BULK_S3_BUCKET}/{s3_path}{basename}" s3.upload_file(filename, BULK_S3_BUCKET, s3_path + basename, ExtraArgs={"ACL": "public-read"}) print("uploaded", s3_url) obj, created = DataExport.objects.update_or_create( session=sobj, data_type=data_type, defaults=dict(url=s3_url), )
def jurisdiction_list(request): state_people_data = {} unmatched_by_state = dict( UnmatchedName.objects.filter( status="U").values_list("session__jurisdiction__name").annotate( number=Count("id"))) for state in states + [us.unitedstatesofamerica]: jid = abbr_to_jid(state.abbr) current_people = [ person_data(p) for p in Person.objects.filter( current_jurisdiction_id=jid, current_role__isnull=False).prefetch_related("offices") ] photoless = 0 phoneless = 0 addressless = 0 for person in current_people: if "image" not in person or person["image"] == "": photoless += 1 elif "capitol_voice" not in person and "district_voice" not in person: phoneless += 1 elif "capitol_address" not in person and "district_address" not in person: addressless += 1 jurisdiction = "United States" if state.abbr == "US" else state.name state_people_data[state.abbr.lower()] = { "state": jurisdiction, "unmatched": unmatched_by_state.get(state.name, 0), "missing_photo": photoless, "missing_phone": phoneless, "missing_address": addressless, } return render( request, "people_admin/jurisdiction_list.html", {"state_people_data": state_people_data}, )
def bill_detail( request, abbr=None, session=None, bill_id=None, chamber=None, billy_bill_id=None ): if abbr: jid = abbr_to_jid(abbr) params = { "legislative_session__jurisdiction_id": jid, "legislative_session__identifier": session, "identifier": bill_id, } elif billy_bill_id: params = {"legacy_mapping__legacy_id": billy_bill_id} if chamber: if abbr in ("ne", "dc") and chamber == "upper": chamber = "legislature" params["from_organization__classification"] = chamber bills = bill_qs(include_votes=True) bill = get_object_or_404(bills, **params) return JsonResponse(convert_bill(bill, include_votes=True))
def search_people(query, *, state=None, current=True): if current: people = PersonProxy.objects.filter( *current_role_filters(), memberships__organization__classification__in=[ "upper", "lower", "legislature", ], name__icontains=query) else: people = PersonProxy.objects.filter(name__icontains=query) if state: people = people.filter( memberships__organization__jurisdiction_id=abbr_to_jid(state)) people = people.prefetch_related("memberships", "memberships__organization", "memberships__post") return people