示例#1
0
    def build_single_notice(self, notice_json, checkpoint=True):
        logging.info('building notice {0} from {1}'.format(notice_json['document_number'], notice_json['full_text_xml_url']))
        if checkpoint:
            notice = self.checkpointer.checkpoint(
                'notice-' + notice_json['document_number'],
                lambda: build_notice(self.cfr_title, self.cfr_part, notice_json)
            )
        else:
            notice = build_notice(self.cfr_title, self.cfr_part, notice_json)

        return notice
示例#2
0
    def build_single_notice(self, notice_json, checkpoint=True):
        logging.info('building notice {0} from {1}'.format(
            notice_json['document_number'], notice_json['full_text_xml_url']))
        if checkpoint:
            notice = self.checkpointer.checkpoint(
                'notice-' + notice_json['document_number'],
                lambda: build_notice(self.cfr_title, self.cfr_part,
                                     notice_json)
            )
        else:
            notice = build_notice(self.cfr_title, self.cfr_part, notice_json)

        return notice
示例#3
0
    def build_single_notice(self, notice_json, checkpoint=True):
        logging.info(
            "building notice {0} from {1}".format(notice_json["document_number"], notice_json["full_text_xml_url"])
        )
        if checkpoint:
            notice = self.checkpointer.checkpoint(
                "notice-" + notice_json["document_number"],
                lambda: build_notice(self.cfr_title, self.cfr_part, notice_json),
            )
        else:
            notice = build_notice(self.cfr_title, self.cfr_part, notice_json)

        return notice
    def test_build_notice_override_fr(self):
        """ Test that the FR_NOTICE_OVERRIDES setting can override the
        'dates' value from build_notice """
        fr = {
            'abstract': 'sum sum sum',
            'action': 'actact',
            'agency_names': ['Agency 1', 'Agency 2'],
            'cfr_references': [{'title': 12, 'part': 9191},
                               {'title': 12, 'part': 9292}],
            'citation': 'citation citation',
            'comments_close_on': None,
            'dates': 'date info',
            'document_number': '7878-111',
            'effective_on': '1956-09-09',
            'end_page': 9999,
            'full_text_xml_url': None,
            'html_url': 'some url',
            'publication_date': '1955-12-10',
            'regulation_id_numbers': ['a231a-232q'],
            'start_page': 8888,
            'type': 'Rule',
            'volume': 66,
        }

        # Set our override value
        build.settings.FR_NOTICE_OVERRIDES['7878-111'] = {
            'dates': 'new date info',
        }

        self.assertEqual(build.build_notice('5', '9292', fr), [{
            'abstract': 'sum sum sum',
            'action': 'actact',
            'agency_names': ['Agency 1', 'Agency 2'],
            'cfr_parts': ['9191', '9292'],
            'cfr_part': '9292',
            'cfr_title': '5',
            'document_number': '7878-111',
            'effective_on': '1956-09-09',
            'fr_citation': 'citation citation',
            'fr_url': 'some url',
            'fr_volume': 66,
            'initial_effective_on': '1956-09-09',
            'meta': {
                'dates': 'new date info',
                'end_page': 9999,
                'start_page': 8888,
                'type': 'Rule'
            },
            'publication_date': '1955-12-10',
            'regulation_id_numbers': ['a231a-232q'],
        }])
 def test_build_notice(self):
     fr = {
         'abstract': 'sum sum sum',
         'action': 'actact',
         'agency_names': ['Agency 1', 'Agency 2'],
         'cfr_references': [{'title': 12, 'part': 9191},
                            {'title': 12, 'part': 9292}],
         'citation': 'citation citation',
         'comments_close_on': None,
         'dates': 'date info',
         'document_number': '7878-111',
         'effective_on': '1956-09-09',
         'end_page': 9999,
         'full_text_xml_url': None,
         'html_url': 'some url',
         'publication_date': '1955-12-10',
         'regulation_id_numbers': ['a231a-232q'],
         'start_page': 8888,
         'type': 'Rule',
         'volume': 66,
     }
     notices = build.build_notice('5', '9292', fr)
     self.assertEqual(1, len(notices))
     actual_notice = notices[0]
     for key in ['agency_names', 'cfr_parts']:
         actual_notice[key] = sorted(actual_notice[key])
     self.assertEqual(actual_notice, {
         'abstract': 'sum sum sum',
         'action': 'actact',
         'agency_names': ['Agency 1', 'Agency 2'],
         'cfr_parts': ['9191', '9292'],
         'cfr_title': '5',
         'document_number': '7878-111',
         'effective_on': '1956-09-09',
         'fr_citation': 'citation citation',
         'fr_url': 'some url',
         'fr_volume': 66,
         'initial_effective_on': '1956-09-09',
         'meta': {
             'dates': 'date info',
             'end_page': 9999,
             'start_page': 8888,
             'type': 'Rule'
         },
         'publication_date': '1955-12-10',
         'regulation_id_numbers': ['a231a-232q'],
     })
def fetch_notices(cfr_title, cfr_part):
    """Search through all articles associated with this part. Right now,
    limited to 1000; could use paging to fix this in the future."""
    results = requests.get(API_BASE + "articles", params={
        "conditions[cfr][title]": cfr_title,
        "conditions[cfr][part]": cfr_part,
        "per_page": 1000,
        "order": "oldest",
        "fields[]": [
            "abstract", "action", "agency_names", "citation",
            "comments_close_on", "dates", "document_number", "effective_on",
            "end_page", "full_text_xml_url", "html_url", "publication_date",
            "regulation_id_numbers", "start_page", "type", "volume"
        ]}).json()

    notices = []
    for result in results['results']:
        notices.append(build_notice(cfr_title, cfr_part, result))
    return notices
示例#7
0
 def test_build_notice(self):
     fr = {
         'abstract': 'sum sum sum',
         'action': 'actact',
         'agency_names': ['Agency 1', 'Agency 2'],
         'cfr_references': [{'title': 12, 'part': 9191},
                            {'title': 12, 'part': 9292}],
         'citation': 'citation citation',
         'comments_close_on': None,
         'dates': 'date info',
         'document_number': '7878-111',
         'effective_on': '1956-09-09',
         'end_page': 9999,
         'full_text_xml_url': None,
         'html_url': 'some url',
         'publication_date': '1955-12-10',
         'regulation_id_numbers': ['a231a-232q'],
         'start_page': 8888,
         'type': 'Rule',
         'volume': 66,
     }
     self.assertEqual(build.build_notice('5', '9292', fr), [{
         'abstract': 'sum sum sum',
         'action': 'actact',
         'agency_names': ['Agency 1', 'Agency 2'],
         'cfr_parts': ['9191', '9292'],
         'cfr_title': '5',
         'document_number': '7878-111',
         'effective_on': '1956-09-09',
         'fr_citation': 'citation citation',
         'fr_url': 'some url',
         'fr_volume': 66,
         'initial_effective_on': '1956-09-09',
         'meta': {
             'dates': 'date info',
             'end_page': 9999,
             'start_page': 8888,
             'type': 'Rule'
         },
         'publication_date': '1955-12-10',
         'regulation_id_numbers': ['a231a-232q'],
     }])
 def test_build_notice(self):
     fr = {
         "abstract": "sum sum sum",
         "action": "actact",
         "agency_names": ["Agency 1", "Agency 2"],
         "cfr_references": [{"title": 12, "part": 9191}, {"title": 12, "part": 9292}],
         "citation": "citation citation",
         "comments_close_on": None,
         "dates": "date info",
         "document_number": "7878-111",
         "effective_on": "1956-09-09",
         "end_page": 9999,
         "full_text_xml_url": None,
         "html_url": "some url",
         "publication_date": "1955-12-10",
         "regulation_id_numbers": ["a231a-232q"],
         "start_page": 8888,
         "type": "Rule",
         "volume": 66,
     }
     self.assertEqual(
         build.build_notice("5", "9292", fr),
         [
             {
                 "abstract": "sum sum sum",
                 "action": "actact",
                 "agency_names": ["Agency 1", "Agency 2"],
                 "cfr_parts": ["9191", "9292"],
                 "cfr_title": "5",
                 "document_number": "7878-111",
                 "effective_on": "1956-09-09",
                 "fr_citation": "citation citation",
                 "fr_url": "some url",
                 "fr_volume": 66,
                 "initial_effective_on": "1956-09-09",
                 "meta": {"dates": "date info", "end_page": 9999, "start_page": 8888, "type": "Rule"},
                 "publication_date": "1955-12-10",
                 "regulation_id_numbers": ["a231a-232q"],
             }
         ],
     )
示例#9
0
def first_notice_and_xml(title, part):
    """Find the first annual xml and its associated notice"""
    notices = [build_notice(title, part, n, do_process_xml=False)
               for n in fetch_notice_json(title, part, only_final=True)
               if n['full_text_xml_url'] and n['effective_on']]
    modify_effective_dates(notices)

    notices = sorted(notices,
                     key=lambda n: (n['effective_on'], n['publication_date']))

    years = {}
    for n in notices:
        year = annual_edition_for(title, n)
        years[year] = n

    for year, notice in sorted(years.iteritems()):
        volume = find_volume(year, title, part)
        if volume:
            part_xml = volume.find_part_xml(part)
            if part_xml is not None:
                return (notice, part_xml)
 def test_build_notice(self):
     fr = {
         'abstract': 'sum sum sum',
         'action': 'actact',
         'agency_names': ['Agency 1', 'Agency 2'],
         'citation': 'citation citation',
         'comments_close_on': None,
         'dates': 'date info',
         'document_number': '7878-111',
         'effective_on': '1956-09-09',
         'end_page': 9999,
         'full_text_xml_url': None,
         'html_url': 'some url',
         'publication_date': '1955-12-10',
         'regulation_id_numbers': ['a231a-232q'],
         'start_page': 8888,
         'type': 'Rule',
         'volume': 66,
     }
     self.assertEqual(build.build_notice('5', '9292', fr), [{
         'abstract': 'sum sum sum',
         'action': 'actact',
         'agency_names': ['Agency 1', 'Agency 2'],
         'cfr_part': '9292',
         'cfr_title': '5',
         'document_number': '7878-111',
         'effective_on': '1956-09-09',
         'fr_citation': 'citation citation',
         'fr_url': 'some url',
         'fr_volume': 66,
         'initial_effective_on': '1956-09-09',
         'meta': {
             'dates': 'date info',
             'end_page': 9999,
             'start_page': 8888,
             'type': 'Rule'
         },
         'publication_date': '1955-12-10',
         'regulation_id_numbers': ['a231a-232q'],
     }])
 def test_build_notice(self):
     fr = {
         'cfr_references': [{'title': 12, 'part': 9191},
                            {'title': 12, 'part': 9292}],
         'citation': 'citation citation',
         'comments_close_on': None,
         'dates': 'date info',
         'document_number': '7878-111',
         'effective_on': '1956-09-09',
         'end_page': 9999,
         'full_text_xml_url': None,
         'html_url': 'some url',
         'publication_date': '1955-12-10',
         'regulation_id_numbers': ['a231a-232q'],
         'start_page': 8888,
         'type': 'Rule',
         'volume': 66,
     }
     notices = build.build_notice('5', '9292', fr)
     self.assertEqual(1, len(notices))
     actual_notice = notices[0]
     actual_notice['cfr_parts'] = sorted(actual_notice['cfr_parts'])
     self.assertEqual(actual_notice, {
         'cfr_parts': ['9191', '9292'],
         'cfr_title': '5',
         'document_number': '7878-111',
         'effective_on': '1956-09-09',
         'fr_citation': 'citation citation',
         'fr_url': 'some url',
         'fr_volume': 66,
         'initial_effective_on': '1956-09-09',
         'meta': {
             'dates': 'date info',
             'end_page': 9999,
             'start_page': 8888,
             'type': 'Rule'
         },
         'publication_date': '1955-12-10',
         'regulation_id_numbers': ['a231a-232q'],
     })
示例#12
0
def fetch_sxs(document_number):
    """Fetch and parse Section-by-Section analyses.

    DOCUMENT_NUMBER is the identifier associated with a final rule. If a rule
    has been split, use the split identifiers, a.k.a. version ids"""
    sxs_entry = entry.SxS(document_number)
    notice_entry = entry.Notice(document_number)

    deps = dependency.Graph()
    deps.add(sxs_entry, notice_entry)

    deps.validate_for(sxs_entry)
    # We don't check for staleness as we want to always execute when given a
    # specific file to process

    # @todo - break apart processing of SxS. We don't need all of the other
    # fields
    notice_xml = notice_entry.read()
    notice_meta = meta_data(document_number, FULL_NOTICE_FIELDS)
    notice = build_notice(notice_xml.cfr_titles[0], None, notice_meta,
                          xml_to_process=notice_xml.xml)[0]
    sxs_entry.write(notice)
def fetch_sxs(document_number):
    """Fetch and parse Section-by-Section analyses.

    DOCUMENT_NUMBER is the identifier associated with a final rule. If a rule
    has been split, use the split identifiers, a.k.a. version ids"""
    sxs_entry = entry.SxS(document_number)
    notice_entry = entry.Notice(document_number)

    deps = dependency.Graph()
    deps.add(sxs_entry, notice_entry)

    deps.validate_for(sxs_entry)
    # We don't check for staleness as we want to always execute when given a
    # specific file to process

    # @todo - break apart processing of SxS. We don't need all of the other
    # fields
    notice_xml = notice_entry.read()
    notice_meta = meta_data(document_number, FULL_NOTICE_FIELDS)
    notice = build_notice(notice_xml.cfr_refs[0].title, None, notice_meta,
                          xml_to_process=notice_xml.xml)[0]
    sxs_entry.write(notice)
示例#14
0
def first_notice_and_xml(title, part):
    """Find the first annual xml and its associated notice"""
    notices = [
        build_notice(title, part, n, do_process_xml=False)
        for n in fetch_notice_json(title, part, only_final=True)
        if n['full_text_xml_url'] and n['effective_on']
    ]
    modify_effective_dates(notices)

    notices = sorted(notices,
                     key=lambda n: (n['effective_on'], n['publication_date']))

    years = {}
    for n in notices:
        year = annual_edition_for(title, n)
        years[year] = n

    for year, notice in sorted(years.iteritems()):
        volume = find_volume(year, title, part)
        if volume:
            part_xml = volume.find_part_xml(part)
            if part_xml is not None:
                return (notice, part_xml)
def fetch_notices(cfr_title, cfr_part, only_final=False):
    """Search and then convert to notice objects (including parsing)"""
    notices = []
    for result in fetch_notice_json(cfr_title, cfr_part, only_final):
        notices.extend(build_notice(cfr_title, cfr_part, result))
    return notices
示例#16
0
def fetch_notices(cfr_title, cfr_part, only_final=False):
    """Search and then convert to notice objects (including parsing)"""
    notices = []
    for result in fetch_notice_json(cfr_title, cfr_part, only_final):
        notices.extend(build_notice(cfr_title, cfr_part, result))
    return notices
示例#17
0
    def test_build_notice_override_fr(self):
        """ Test that the FR_NOTICE_OVERRIDES setting can override the
        'dates' value from build_notice """
        fr = {
            'abstract':
            'sum sum sum',
            'action':
            'actact',
            'agency_names': ['Agency 1', 'Agency 2'],
            'cfr_references': [{
                'title': 12,
                'part': 9191
            }, {
                'title': 12,
                'part': 9292
            }],
            'citation':
            'citation citation',
            'comments_close_on':
            None,
            'dates':
            'date info',
            'document_number':
            '7878-111',
            'effective_on':
            '1956-09-09',
            'end_page':
            9999,
            'full_text_xml_url':
            None,
            'html_url':
            'some url',
            'publication_date':
            '1955-12-10',
            'regulation_id_numbers': ['a231a-232q'],
            'start_page':
            8888,
            'type':
            'Rule',
            'volume':
            66,
        }

        # Set our override value
        build.settings.FR_NOTICE_OVERRIDES['7878-111'] = {
            'dates': 'new date info',
        }

        self.assertEqual(build.build_notice('5', '9292', fr),
                         [{
                             'abstract': 'sum sum sum',
                             'action': 'actact',
                             'agency_names': ['Agency 1', 'Agency 2'],
                             'cfr_parts': ['9191', '9292'],
                             'cfr_part': '9292',
                             'cfr_title': '5',
                             'document_number': '7878-111',
                             'effective_on': '1956-09-09',
                             'fr_citation': 'citation citation',
                             'fr_url': 'some url',
                             'fr_volume': 66,
                             'initial_effective_on': '1956-09-09',
                             'meta': {
                                 'dates': 'new date info',
                                 'end_page': 9999,
                                 'start_page': 8888,
                                 'type': 'Rule'
                             },
                             'publication_date': '1955-12-10',
                             'regulation_id_numbers': ['a231a-232q'],
                         }])