def build_single_notice(self, notice_json, checkpoint=True): logging.info('building notice {0} from {1}'.format(notice_json['document_number'], notice_json['full_text_xml_url'])) if checkpoint: notice = self.checkpointer.checkpoint( 'notice-' + notice_json['document_number'], lambda: build_notice(self.cfr_title, self.cfr_part, notice_json) ) else: notice = build_notice(self.cfr_title, self.cfr_part, notice_json) return notice
def build_single_notice(self, notice_json, checkpoint=True): logging.info('building notice {0} from {1}'.format( notice_json['document_number'], notice_json['full_text_xml_url'])) if checkpoint: notice = self.checkpointer.checkpoint( 'notice-' + notice_json['document_number'], lambda: build_notice(self.cfr_title, self.cfr_part, notice_json) ) else: notice = build_notice(self.cfr_title, self.cfr_part, notice_json) return notice
def build_single_notice(self, notice_json, checkpoint=True): logging.info( "building notice {0} from {1}".format(notice_json["document_number"], notice_json["full_text_xml_url"]) ) if checkpoint: notice = self.checkpointer.checkpoint( "notice-" + notice_json["document_number"], lambda: build_notice(self.cfr_title, self.cfr_part, notice_json), ) else: notice = build_notice(self.cfr_title, self.cfr_part, notice_json) return notice
def test_build_notice_override_fr(self): """ Test that the FR_NOTICE_OVERRIDES setting can override the 'dates' value from build_notice """ fr = { 'abstract': 'sum sum sum', 'action': 'actact', 'agency_names': ['Agency 1', 'Agency 2'], 'cfr_references': [{'title': 12, 'part': 9191}, {'title': 12, 'part': 9292}], 'citation': 'citation citation', 'comments_close_on': None, 'dates': 'date info', 'document_number': '7878-111', 'effective_on': '1956-09-09', 'end_page': 9999, 'full_text_xml_url': None, 'html_url': 'some url', 'publication_date': '1955-12-10', 'regulation_id_numbers': ['a231a-232q'], 'start_page': 8888, 'type': 'Rule', 'volume': 66, } # Set our override value build.settings.FR_NOTICE_OVERRIDES['7878-111'] = { 'dates': 'new date info', } self.assertEqual(build.build_notice('5', '9292', fr), [{ 'abstract': 'sum sum sum', 'action': 'actact', 'agency_names': ['Agency 1', 'Agency 2'], 'cfr_parts': ['9191', '9292'], 'cfr_part': '9292', 'cfr_title': '5', 'document_number': '7878-111', 'effective_on': '1956-09-09', 'fr_citation': 'citation citation', 'fr_url': 'some url', 'fr_volume': 66, 'initial_effective_on': '1956-09-09', 'meta': { 'dates': 'new date info', 'end_page': 9999, 'start_page': 8888, 'type': 'Rule' }, 'publication_date': '1955-12-10', 'regulation_id_numbers': ['a231a-232q'], }])
def test_build_notice(self): fr = { 'abstract': 'sum sum sum', 'action': 'actact', 'agency_names': ['Agency 1', 'Agency 2'], 'cfr_references': [{'title': 12, 'part': 9191}, {'title': 12, 'part': 9292}], 'citation': 'citation citation', 'comments_close_on': None, 'dates': 'date info', 'document_number': '7878-111', 'effective_on': '1956-09-09', 'end_page': 9999, 'full_text_xml_url': None, 'html_url': 'some url', 'publication_date': '1955-12-10', 'regulation_id_numbers': ['a231a-232q'], 'start_page': 8888, 'type': 'Rule', 'volume': 66, } notices = build.build_notice('5', '9292', fr) self.assertEqual(1, len(notices)) actual_notice = notices[0] for key in ['agency_names', 'cfr_parts']: actual_notice[key] = sorted(actual_notice[key]) self.assertEqual(actual_notice, { 'abstract': 'sum sum sum', 'action': 'actact', 'agency_names': ['Agency 1', 'Agency 2'], 'cfr_parts': ['9191', '9292'], 'cfr_title': '5', 'document_number': '7878-111', 'effective_on': '1956-09-09', 'fr_citation': 'citation citation', 'fr_url': 'some url', 'fr_volume': 66, 'initial_effective_on': '1956-09-09', 'meta': { 'dates': 'date info', 'end_page': 9999, 'start_page': 8888, 'type': 'Rule' }, 'publication_date': '1955-12-10', 'regulation_id_numbers': ['a231a-232q'], })
def fetch_notices(cfr_title, cfr_part): """Search through all articles associated with this part. Right now, limited to 1000; could use paging to fix this in the future.""" results = requests.get(API_BASE + "articles", params={ "conditions[cfr][title]": cfr_title, "conditions[cfr][part]": cfr_part, "per_page": 1000, "order": "oldest", "fields[]": [ "abstract", "action", "agency_names", "citation", "comments_close_on", "dates", "document_number", "effective_on", "end_page", "full_text_xml_url", "html_url", "publication_date", "regulation_id_numbers", "start_page", "type", "volume" ]}).json() notices = [] for result in results['results']: notices.append(build_notice(cfr_title, cfr_part, result)) return notices
def test_build_notice(self): fr = { 'abstract': 'sum sum sum', 'action': 'actact', 'agency_names': ['Agency 1', 'Agency 2'], 'cfr_references': [{'title': 12, 'part': 9191}, {'title': 12, 'part': 9292}], 'citation': 'citation citation', 'comments_close_on': None, 'dates': 'date info', 'document_number': '7878-111', 'effective_on': '1956-09-09', 'end_page': 9999, 'full_text_xml_url': None, 'html_url': 'some url', 'publication_date': '1955-12-10', 'regulation_id_numbers': ['a231a-232q'], 'start_page': 8888, 'type': 'Rule', 'volume': 66, } self.assertEqual(build.build_notice('5', '9292', fr), [{ 'abstract': 'sum sum sum', 'action': 'actact', 'agency_names': ['Agency 1', 'Agency 2'], 'cfr_parts': ['9191', '9292'], 'cfr_title': '5', 'document_number': '7878-111', 'effective_on': '1956-09-09', 'fr_citation': 'citation citation', 'fr_url': 'some url', 'fr_volume': 66, 'initial_effective_on': '1956-09-09', 'meta': { 'dates': 'date info', 'end_page': 9999, 'start_page': 8888, 'type': 'Rule' }, 'publication_date': '1955-12-10', 'regulation_id_numbers': ['a231a-232q'], }])
def test_build_notice(self): fr = { "abstract": "sum sum sum", "action": "actact", "agency_names": ["Agency 1", "Agency 2"], "cfr_references": [{"title": 12, "part": 9191}, {"title": 12, "part": 9292}], "citation": "citation citation", "comments_close_on": None, "dates": "date info", "document_number": "7878-111", "effective_on": "1956-09-09", "end_page": 9999, "full_text_xml_url": None, "html_url": "some url", "publication_date": "1955-12-10", "regulation_id_numbers": ["a231a-232q"], "start_page": 8888, "type": "Rule", "volume": 66, } self.assertEqual( build.build_notice("5", "9292", fr), [ { "abstract": "sum sum sum", "action": "actact", "agency_names": ["Agency 1", "Agency 2"], "cfr_parts": ["9191", "9292"], "cfr_title": "5", "document_number": "7878-111", "effective_on": "1956-09-09", "fr_citation": "citation citation", "fr_url": "some url", "fr_volume": 66, "initial_effective_on": "1956-09-09", "meta": {"dates": "date info", "end_page": 9999, "start_page": 8888, "type": "Rule"}, "publication_date": "1955-12-10", "regulation_id_numbers": ["a231a-232q"], } ], )
def first_notice_and_xml(title, part): """Find the first annual xml and its associated notice""" notices = [build_notice(title, part, n, do_process_xml=False) for n in fetch_notice_json(title, part, only_final=True) if n['full_text_xml_url'] and n['effective_on']] modify_effective_dates(notices) notices = sorted(notices, key=lambda n: (n['effective_on'], n['publication_date'])) years = {} for n in notices: year = annual_edition_for(title, n) years[year] = n for year, notice in sorted(years.iteritems()): volume = find_volume(year, title, part) if volume: part_xml = volume.find_part_xml(part) if part_xml is not None: return (notice, part_xml)
def test_build_notice(self): fr = { 'abstract': 'sum sum sum', 'action': 'actact', 'agency_names': ['Agency 1', 'Agency 2'], 'citation': 'citation citation', 'comments_close_on': None, 'dates': 'date info', 'document_number': '7878-111', 'effective_on': '1956-09-09', 'end_page': 9999, 'full_text_xml_url': None, 'html_url': 'some url', 'publication_date': '1955-12-10', 'regulation_id_numbers': ['a231a-232q'], 'start_page': 8888, 'type': 'Rule', 'volume': 66, } self.assertEqual(build.build_notice('5', '9292', fr), [{ 'abstract': 'sum sum sum', 'action': 'actact', 'agency_names': ['Agency 1', 'Agency 2'], 'cfr_part': '9292', 'cfr_title': '5', 'document_number': '7878-111', 'effective_on': '1956-09-09', 'fr_citation': 'citation citation', 'fr_url': 'some url', 'fr_volume': 66, 'initial_effective_on': '1956-09-09', 'meta': { 'dates': 'date info', 'end_page': 9999, 'start_page': 8888, 'type': 'Rule' }, 'publication_date': '1955-12-10', 'regulation_id_numbers': ['a231a-232q'], }])
def test_build_notice(self): fr = { 'cfr_references': [{'title': 12, 'part': 9191}, {'title': 12, 'part': 9292}], 'citation': 'citation citation', 'comments_close_on': None, 'dates': 'date info', 'document_number': '7878-111', 'effective_on': '1956-09-09', 'end_page': 9999, 'full_text_xml_url': None, 'html_url': 'some url', 'publication_date': '1955-12-10', 'regulation_id_numbers': ['a231a-232q'], 'start_page': 8888, 'type': 'Rule', 'volume': 66, } notices = build.build_notice('5', '9292', fr) self.assertEqual(1, len(notices)) actual_notice = notices[0] actual_notice['cfr_parts'] = sorted(actual_notice['cfr_parts']) self.assertEqual(actual_notice, { 'cfr_parts': ['9191', '9292'], 'cfr_title': '5', 'document_number': '7878-111', 'effective_on': '1956-09-09', 'fr_citation': 'citation citation', 'fr_url': 'some url', 'fr_volume': 66, 'initial_effective_on': '1956-09-09', 'meta': { 'dates': 'date info', 'end_page': 9999, 'start_page': 8888, 'type': 'Rule' }, 'publication_date': '1955-12-10', 'regulation_id_numbers': ['a231a-232q'], })
def fetch_sxs(document_number): """Fetch and parse Section-by-Section analyses. DOCUMENT_NUMBER is the identifier associated with a final rule. If a rule has been split, use the split identifiers, a.k.a. version ids""" sxs_entry = entry.SxS(document_number) notice_entry = entry.Notice(document_number) deps = dependency.Graph() deps.add(sxs_entry, notice_entry) deps.validate_for(sxs_entry) # We don't check for staleness as we want to always execute when given a # specific file to process # @todo - break apart processing of SxS. We don't need all of the other # fields notice_xml = notice_entry.read() notice_meta = meta_data(document_number, FULL_NOTICE_FIELDS) notice = build_notice(notice_xml.cfr_titles[0], None, notice_meta, xml_to_process=notice_xml.xml)[0] sxs_entry.write(notice)
def fetch_sxs(document_number): """Fetch and parse Section-by-Section analyses. DOCUMENT_NUMBER is the identifier associated with a final rule. If a rule has been split, use the split identifiers, a.k.a. version ids""" sxs_entry = entry.SxS(document_number) notice_entry = entry.Notice(document_number) deps = dependency.Graph() deps.add(sxs_entry, notice_entry) deps.validate_for(sxs_entry) # We don't check for staleness as we want to always execute when given a # specific file to process # @todo - break apart processing of SxS. We don't need all of the other # fields notice_xml = notice_entry.read() notice_meta = meta_data(document_number, FULL_NOTICE_FIELDS) notice = build_notice(notice_xml.cfr_refs[0].title, None, notice_meta, xml_to_process=notice_xml.xml)[0] sxs_entry.write(notice)
def first_notice_and_xml(title, part): """Find the first annual xml and its associated notice""" notices = [ build_notice(title, part, n, do_process_xml=False) for n in fetch_notice_json(title, part, only_final=True) if n['full_text_xml_url'] and n['effective_on'] ] modify_effective_dates(notices) notices = sorted(notices, key=lambda n: (n['effective_on'], n['publication_date'])) years = {} for n in notices: year = annual_edition_for(title, n) years[year] = n for year, notice in sorted(years.iteritems()): volume = find_volume(year, title, part) if volume: part_xml = volume.find_part_xml(part) if part_xml is not None: return (notice, part_xml)
def fetch_notices(cfr_title, cfr_part, only_final=False): """Search and then convert to notice objects (including parsing)""" notices = [] for result in fetch_notice_json(cfr_title, cfr_part, only_final): notices.extend(build_notice(cfr_title, cfr_part, result)) return notices
def test_build_notice_override_fr(self): """ Test that the FR_NOTICE_OVERRIDES setting can override the 'dates' value from build_notice """ fr = { 'abstract': 'sum sum sum', 'action': 'actact', 'agency_names': ['Agency 1', 'Agency 2'], 'cfr_references': [{ 'title': 12, 'part': 9191 }, { 'title': 12, 'part': 9292 }], 'citation': 'citation citation', 'comments_close_on': None, 'dates': 'date info', 'document_number': '7878-111', 'effective_on': '1956-09-09', 'end_page': 9999, 'full_text_xml_url': None, 'html_url': 'some url', 'publication_date': '1955-12-10', 'regulation_id_numbers': ['a231a-232q'], 'start_page': 8888, 'type': 'Rule', 'volume': 66, } # Set our override value build.settings.FR_NOTICE_OVERRIDES['7878-111'] = { 'dates': 'new date info', } self.assertEqual(build.build_notice('5', '9292', fr), [{ 'abstract': 'sum sum sum', 'action': 'actact', 'agency_names': ['Agency 1', 'Agency 2'], 'cfr_parts': ['9191', '9292'], 'cfr_part': '9292', 'cfr_title': '5', 'document_number': '7878-111', 'effective_on': '1956-09-09', 'fr_citation': 'citation citation', 'fr_url': 'some url', 'fr_volume': 66, 'initial_effective_on': '1956-09-09', 'meta': { 'dates': 'new date info', 'end_page': 9999, 'start_page': 8888, 'type': 'Rule' }, 'publication_date': '1955-12-10', 'regulation_id_numbers': ['a231a-232q'], }])