def test_validate_keyterms(self):
        tree = etree.fromstring("""
        <section xmlns="eregs" >
          <paragraph>
            <title type="keyterm">A Keyterm.</title>
            <content>A Keyterm. This paragraph should error.</content>
          </paragraph>
          <paragraph>
            <title type="keyterm">Another Keyterm.</title>
            <content>Keyterm. Fragment This one should warn.</content>
          </paragraph>
        </section>
        """)
        validator = EregsValidator(settings.XSD_FILE)
        validator.validate_keyterms(tree)

        self.assertEqual(len(validator.events), 3)

        self.assertEqual(validator.events[0].severity, Severity.ERROR)
        self.assertTrue('Duplicate keyterm' in validator.events[0].msg)

        self.assertEqual(validator.events[1].severity, Severity.WARNING)
        self.assertTrue('keyterm fragment' in validator.events[1].msg)

        self.assertEqual(validator.events[2].severity, Severity.WARNING)
        self.assertTrue('repeating keyterms' in validator.events[2].msg)
    def test_invalid_content_is_not_valid_and_has_critical_errors(self):
        xml = etree.fromstring("""<regulation xmlns="eregs">
  <fdsys>
    <cfrTitleNum>this-should-be-a-number</cfrTitleNum>
    <cfrTitleText>TESTING</cfrTitleText>
    <volume>99</volume>
    <date>2017-01-01</date>
    <originalDate>2017-01-01</originalDate>
    <title>TEST</title>
  </fdsys>
  <preamble>
    <agency>Bureau of Consumer Financial Protection</agency>
    <regLetter>TEST</regLetter>
    <cfr>
      <title>123</title>
      <section>4567</section>
    </cfr>
    <documentNumber>9999-99999</documentNumber>
    <effectiveDate>2017-01-01</effectiveDate>
    <federalRegisterURL>https://www.foo.gov/testing</federalRegisterURL>
  </preamble>
  <part label="9999">
    <content></content>
  </part>
</regulation>""")
        validator = EregsValidator(settings.XSD_FILE)
        validator.validate_reg(xml)
        self.assertFalse(validator.is_valid)
        self.assertTrue(validator.has_critical_errors)
    def test_migrate_analysis_notice(self):
        tree = etree.fromstring(
            """
            <notice xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd">
              <fdsys>
                <date>2015-11-17</date>
              </fdsys>
              <preamble>
                <documentNumber>2015-12345</documentNumber>
              </preamble>
              <changeset>
                <change operation="added" label="1234-5">
                  <paragraph label="1234-5">
                    <content/>
                    <analysis>
                      <analysisSection>Some addedanalysis</analysisSection>
                    </analysis>
                  </paragraph>
                </change>
              </changeset>
            </notice>"""
        )
        validator = EregsValidator(settings.XSD_FILE)
        result = validator.migrate_analysis(tree)

        self.assertEqual(len(result.find(".//{eregs}analysis")), 1)

        analysis = result.find(".//{eregs}analysis")
        analysis_parent = analysis.getparent()
        analysis_section = analysis.find("{eregs}analysisSection")

        self.assertEqual(analysis_parent.tag, "{eregs}notice")
        self.assertEqual(analysis_section.get("target"), "1234-5")
        self.assertEqual(analysis_section.get("notice"), "2015-12345")
    def test_migrate_analysis_reg(self):
        tree = etree.fromstring(
            """
            <regulation xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd">
              <fdsys>
                <date>2015-11-17</date>
              </fdsys>
              <preamble>
                <documentNumber>2015-12345</documentNumber>
              </preamble>
              <part label="1234">
                <subpart>
                  <section label="1234-1">
                    <analysis>
                      <analysisSection>Some analysis</analysisSection>
                    </analysis>
                  </section>
                </subpart>
              </part>
            </regulation>"""
        )
        validator = EregsValidator(settings.XSD_FILE)
        result = validator.migrate_analysis(tree)

        self.assertEqual(len(result.find(".//{eregs}analysis")), 1)

        analysis = result.find(".//{eregs}analysis")
        analysis_parent = analysis.getparent()
        analysis_section = analysis.find("{eregs}analysisSection")

        self.assertEqual(analysis_parent.tag, "{eregs}regulation")
        self.assertEqual(analysis_section.get("target"), "1234-1")
        self.assertEqual(analysis_section.get("notice"), "2015-12345")
    def test_validate_keyterms(self):
        tree = etree.fromstring(
            """
        <section xmlns="eregs" >
          <paragraph>
            <title type="keyterm">A Keyterm.</title>
            <content>A Keyterm. This paragraph should error.</content>
          </paragraph>
          <paragraph>
            <title type="keyterm">Another Keyterm.</title>
            <content>Keyterm. Fragment This one should warn.</content>
          </paragraph>
        </section>
        """
        )
        validator = EregsValidator(settings.XSD_FILE)
        validator.validate_keyterms(tree)

        self.assertEqual(len(validator.events), 3)

        self.assertEqual(validator.events[0].severity, Severity.ERROR)
        self.assertTrue("Duplicate keyterm" in validator.events[0].msg)

        self.assertEqual(validator.events[1].severity, Severity.WARNING)
        self.assertTrue("keyterm fragment" in validator.events[1].msg)

        self.assertEqual(validator.events[2].severity, Severity.WARNING)
        self.assertTrue("repeating keyterms" in validator.events[2].msg)
    def test_migrate_analysis_change_analysis_only(self):
        tree = etree.fromstring("""
            <notice xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd">
              <fdsys>
                <date>2015-11-17</date>
              </fdsys>
              <preamble>
                <documentNumber>2015-12345</documentNumber>
              </preamble>
              <changeset>
                <change operation="added" label="1234-2-Analysis" parent="1234-2">
                  <analysis label="1234-Analysis">
                    <analysisSection>An added analysis</analysisSection>
                  </analysis>
                </change>
              </changeset>
            </notice>""")
        validator = EregsValidator(settings.XSD_FILE)
        result = validator.migrate_analysis(tree)

        self.assertEqual(len(result.find('.//{eregs}analysis')), 1)

        analysis = result.find('.//{eregs}analysis')
        analysis_parent = analysis.getparent()
        analysis_section = analysis.find('{eregs}analysisSection')

        self.assertEqual(analysis_parent.tag, '{eregs}notice')
        self.assertEqual(analysis_section.get('target'), '1234-2')
        self.assertEqual(analysis_section.get('notice'), '2015-12345')

        # The empty change should've been deleted.
        self.assertEqual(len(result.findall('.//{eregs}change')), 0)
    def test_migrate_analysis_reg(self):
        tree = etree.fromstring("""
            <regulation xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd">
              <fdsys>
                <date>2015-11-17</date>
              </fdsys>
              <preamble>
                <documentNumber>2015-12345</documentNumber>
              </preamble>
              <part label="1234">
                <subpart>
                  <section label="1234-1">
                    <analysis>
                      <analysisSection>Some analysis</analysisSection>
                    </analysis>
                  </section>
                </subpart>
              </part>
            </regulation>""")
        validator = EregsValidator(settings.XSD_FILE)
        result = validator.migrate_analysis(tree)

        self.assertEqual(len(result.find('.//{eregs}analysis')), 1)

        analysis = result.find('.//{eregs}analysis')
        analysis_parent = analysis.getparent()
        analysis_section = analysis.find('{eregs}analysisSection')

        self.assertEqual(analysis_parent.tag, '{eregs}regulation')
        self.assertEqual(analysis_section.get('target'), '1234-1')
        self.assertEqual(analysis_section.get('notice'), '2015-12345')
Exemplo n.º 8
0
def get_validator(xml_tree):
    # Validate the file relative to schema
    validator = EregsValidator(settings.XSD_FILE)
    validator.validate_reg(xml_tree)

    if not validator.is_valid:
        for event in validator.events:
            print(str(event))
        sys.exit(0)

    return validator
Exemplo n.º 9
0
def get_validator(xml_tree, raise_instead_of_exiting=False):
    # Validate the file relative to schema
    validator = EregsValidator(settings.XSD_FILE)
    validator.validate_reg(xml_tree)

    if not validator.is_valid:
        for event in validator.events:
            print(str(event))
        if raise_instead_of_exiting:
            raise event
        else:
            sys.exit(0)

    return validator
Exemplo n.º 10
0
def migrate_analysis(cfr_title, cfr_part):
    """ Migrate analysis from its context to top-level """

    # Prompt user to be sure they want to do this
    print(colored('This will irrevocably modify all regulation and notice files for this regulation. '
                  'Is this ok?', 'red'))
    answer = None
    while answer not in ['y', 'n']:
        answer = raw_input('Migrate all analysis? y/n: ')
    if answer != 'y':
        return

    # Migrate regulation files
    regml_reg_files = find_all(cfr_part)
    for reg_file in regml_reg_files:
        print(reg_file)
        file_name = os.path.join(reg_file)
        with open(file_name, 'r') as f:
            reg_xml = f.read()
        parser = etree.XMLParser(huge_tree=True)
        xml_tree = etree.fromstring(reg_xml, parser)
        validator = EregsValidator(settings.XSD_FILE)
        validator.migrate_analysis(xml_tree, file_name)
        validator.validate_reg(xml_tree)

    # Migrate notices
    regml_notice_files = find_all(cfr_part, is_notice=True)
    regml_notices = []
    for notice_file in regml_notice_files:
        print(notice_file)
        file_name = os.path.join(notice_file)
        with open(file_name, 'r') as f:
            reg_xml = f.read()
        parser = etree.XMLParser(huge_tree=True)
        xml_tree = etree.fromstring(reg_xml, parser)
        validator = EregsValidator(settings.XSD_FILE)
        validator.migrate_analysis(xml_tree, file_name)
        validator.validate_reg(xml_tree)
 def test_is_not_valid_with_any_errors(self):
     validator = EregsValidator(settings.XSD_FILE)
     validator.events = [EregsValidationEvent('test', Severity.INFO)]
     self.assertFalse(validator.is_valid)
 def test_load_schema_raises_if_remote_schema_does_not_exist(self):
     with self.assertRaises(etree.XMLSchemaParseError):
         EregsValidator('http://some.non/existent.url').load_schema()
 def test_load_schema_raises_if_local_schema_does_not_exist(self):
     with self.assertRaises(etree.XMLSchemaParseError):
         EregsValidator('/some/non/existent/path').load_schema()
 def test_load_schema_works_if_schema_is_local(self):
     try:
         EregsValidator(settings.XSD_FILE).load_schema()
     except etree.XMLSchemaParseError:
         self.fail('schema should load successfully from local path')
 def test_has_critical_errors_true_with_critical_errors(self):
     validator = EregsValidator(settings.XSD_FILE)
     validator.events = [EregsValidationEvent('test', Severity.CRITICAL)]
     self.assertTrue(validator.has_critical_errors)
 def test_has_critical_errors_false_with_non_severe_errors(self):
     validator = EregsValidator(settings.XSD_FILE)
     validator.events = [EregsValidationEvent('test', Severity.INFO)]
     self.assertFalse(validator.has_critical_errors)
 def test_has_critical_errors_false_with_no_errors(self):
     validator = EregsValidator(settings.XSD_FILE)
     self.assertFalse(validator.has_critical_errors)
def parser_driver(regulation_file,
                  check_terms=False,
                  correct_interps=False,
                  headerize_interps=False,
                  fix_missed_cites=False):
    with open(regulation_file, 'r') as f:
        reg_xml = f.read()
    xml_tree = etree.fromstring(reg_xml)

    # validate relative to schema
    validator = EregsValidator(settings.XSD_FILE)
    validator.validate_reg(xml_tree)

    if not validator.is_valid:
        for event in validator.events:
            print(str(event))
        sys.exit(0)

    reg_tree = build_reg_tree(xml_tree)
    reg_number = reg_tree.label[0]
    # we can correct interps right away if necessary
    if correct_interps:
        validator.insert_interp_markers(xml_tree, regulation_file)
    if headerize_interps:
        validator.headerize_interps(xml_tree, regulation_file)
    if fix_missed_cites:
        validator.fix_omitted_cites(xml_tree, regulation_file)

    paragraph_markers = build_paragraph_marker_layer(xml_tree)
    internal_citations = build_internal_citations_layer(xml_tree)
    external_citations = build_external_citations_layer(xml_tree)
    terms = build_terms_layer(xml_tree)
    meta = build_meta_layer(xml_tree)
    toc = build_toc_layer(xml_tree)
    keyterms = build_keyterm_layer(xml_tree)
    graphics = build_graphics_layer(xml_tree)
    formatting = build_formatting_layer(xml_tree)
    interps = build_interp_layer(xml_tree)
    analysis = build_analysis(xml_tree)
    notice_dict = build_notice(xml_tree)

    # if the validator had problems then we should report them and bail out

    validator.validate_terms(xml_tree, terms)
    validator.validate_internal_cites(xml_tree, internal_citations)
    if check_terms:
        validator.validate_term_references(xml_tree, terms, regulation_file)
    for event in validator.events:
        print(str(event))

    reg_tree.include_children = True
    reg_json = reg_tree.to_json()

    notice = xml_tree.find('.//{eregs}documentNumber').text
    version = os.path.split(regulation_file)[-1].replace('.xml', '')
    if notice != version:
        print('Notice ({}) different from version ({}), using version'.format(notice, version))
        notice = version

    write_layer(reg_json, reg_number, notice, 'regulation')
    write_layer(meta, reg_number, notice, 'layer/meta')
    write_layer(paragraph_markers, reg_number, notice,
                'layer/paragraph-markers')
    write_layer(internal_citations, reg_number, notice,
                'layer/internal-citations')
    write_layer(external_citations, reg_number, notice,
                'layer/external-citations')
    write_layer(terms, reg_number, notice, 'layer/terms')
    write_layer(toc, reg_number, notice, 'layer/toc')
    write_layer(keyterms, reg_number, notice, 'layer/keyterms')
    write_layer(graphics, reg_number, notice, 'layer/graphics')
    write_layer(formatting, reg_number, notice, 'layer/formatting')
    write_layer(interps, reg_number, notice, 'layer/interpretations')
    write_layer(analysis, reg_number, notice, 'layer/analyses')
    write_layer(notice_dict, reg_number, notice, 'notice')
 def test_is_valid_with_no_errors(self):
     validator = EregsValidator(settings.XSD_FILE)
     self.assertTrue(validator.is_valid)