def test_build_from_section_extract(): """Account for paragraphs within an EXTRACT tag""" with section_ctx() as ctx: ctx.P("(a) aaaa") with ctx.EXTRACT(): ctx.P("1. Some content") ctx.P("2. Other content") ctx.P("(3) This paragraph has parens for some reason") node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.label == ['8675', '309'] assert node.child_labels == ['a'] assert node.text == '' assert node.node_type == 'regtext' assert node['a'].text == '(a) aaaa' assert node['a'].node_type == 'regtext' assert node['a'].child_labels == ['p1'] assert node['a']['p1'].text == '' assert node['a']['p1'].node_type == 'extract' assert node['a']['p1'].child_labels == ['p1', 'p2', 'p3'] for child in node['a']['p1'].children: assert child.node_type == 'regtext' assert node['a']['p1']['p1'].text == "1. Some content" assert node['a']['p1']['p2'].text == "2. Other content" assert node['a']['p1']['p3'].text == ( "(3) This paragraph has parens for some reason")
def test_build_from_4_section_reserved_range(): with XMLBuilder("SECTION") as ctx: ctx.SECTNO("§§ 8675.309-8675.312") ctx.RESERVED("[Reserved]") n309 = section.build_from_section('8675', ctx.xml)[0] assert n309.label == ['8675', '309'] assert n309.title == '§§ 8675.309-312 [Reserved]'
def test_build_from_section_example(): """Account for paragraphs within an EXAMPLE tag""" with section_ctx() as ctx: ctx.P("(a) aaaa") with ctx.EXAMPLE(): ctx.P("You need a form if:") ctx.P("1. Some content") ctx.P("2. Other content") with ctx.EXAMPLE(): ctx.P("You do not need a form if:") ctx.P("1. Some content") ctx.P("2. Other content") node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.child_labels == ['a'] assert node['a'].text == '(a) aaaa' assert node['a'].child_labels == ['p1', 'p2'] assert node['a']['p1'].text == '' assert node['a']['p1'].child_labels == ['p1', 'p2', 'p3'] assert node['a']['p1']['p1'].text == 'You need a form if:' assert node['a']['p1']['p2'].text == '1. Some content' assert node['a']['p1']['p3'].text == '2. Other content' assert node['a']['p2'].text == '' assert node['a']['p2'].child_labels == ['p1', 'p2', 'p3'] assert node['a']['p2']['p1'].text == 'You do not need a form if:' assert node['a']['p2']['p2'].text == '1. Some content' assert node['a']['p2']['p3'].text == '2. Other content'
def test_build_from_section_reserved(): with XMLBuilder("SECTION") as ctx: ctx.SECTNO("§ 8675.309") ctx.RESERVED("[Reserved]") node = section.build_from_section('8675', ctx.xml)[0] assert node.label == ['8675', '309'] assert node.title == '§ 8675.309 [Reserved]' assert node.children == []
def test_build_from_section_bad_spaces(): with section_ctx(section=16) as ctx: ctx.STARS() ctx.child_from_string( '<P>(b)<E T="03">General.</E>Content Content.</P>') node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.label == ['8675', '16'] assert node.child_labels == ['b'] assert node['b'].text == "(b) General. Content Content."
def test_build_from_section_double_alpha(): # Ensure we match a hierarchy like (x), (y), (z), (aa), (bb)… with XMLBuilder("SECTION") as ctx: ctx.SECTNO("§ 8675.309") ctx.SUBJECT("Definitions.") ctx.P("(aa) This is what things mean:") node = section.build_from_section('8675', ctx.xml)[0] child = node.children[0] assert child.text == '(aa) This is what things mean:' assert child.label == ['8675', '309', 'aa']
def _setup_for_ambiguous(final_par): with section_ctx() as ctx: ctx.P("(g) Some Content") ctx.P("(h) H Starts") ctx.P("(1) H-1") ctx.P("(2) H-2") ctx.P("(i) Is this 8675-309-h-2-i or 8675-309-i") ctx.P(final_par) node = section.build_from_section('8675', ctx.xml)[0] return NodeAccessor(node)
def test_build_from_section_double_collapsed(): with section_ctx() as ctx: ctx.child_from_string( '<P>(a) <E T="03">Keyterm</E>—(1)(i) Content</P>') ctx.P("(ii) Content2") node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.child_labels == ['a'] assert node['a'].child_labels == ['1'] assert node['a']['1'].child_labels == ['i', 'ii']
def build_subpart(cfr_part, xml): subpart_title = get_subpart_group_title(xml) subpart = reg_text.build_subpart(subpart_title, cfr_part) sections = [] for ch in xml.xpath('./SECTION'): sections.extend(build_from_section(cfr_part, ch)) subpart.children = sections return subpart
def test_build_from_3_section_reserved_range(): with XMLBuilder("SECTION") as ctx: ctx.SECTNO("§§ 8675.309-8675.311") ctx.RESERVED("[Reserved]") n309, n310, n311 = section.build_from_section('8675', ctx.xml) assert n309.label == ['8675', '309'] assert n310.label == ['8675', '310'] assert n311.label == ['8675', '311'] assert n309.title == '§ 8675.309 [Reserved]' assert n310.title == '§ 8675.310 [Reserved]' assert n311.title == '§ 8675.311 [Reserved]'
def test_build_from_section_intro_text(): with section_ctx() as ctx: ctx.P("Some content about this section.") ctx.P("(a) something something") node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.text == 'Some content about this section.' assert node.child_labels == ['a'] assert node['a'].text == '(a) something something' assert node['a'].children == []
def test_build_from_section_collapsed(): with section_ctx() as ctx: ctx.P("(a) aaa") ctx.P("(1) 111") ctx.child_from_string('<P>(2) 222—(i) iii. (A) AAA</P>') ctx.P("(B) BBB") n309 = section.build_from_section('8675', ctx.xml)[0] n309 = NodeAccessor(n309) assert n309.child_labels == ['a'] assert n309['a'].child_labels == ['1', '2'] assert n309['a']['2'].child_labels == ['i'] assert n309['a']['2']['i'].child_labels == ['A', 'B']
def test_build_from_section_whitespace(): """The whitespace in the section text (and intro paragraph) should get removed""" with XMLBuilder("SECTION", "\n\n") as ctx: ctx.SECTNO("§ 8675.309") ctx.SUBJECT("subsubsub") ctx.P(" Some \n content\n") ctx.P("(a) aaa") ctx.P("(b) bbb") node = section.build_from_section('8675', ctx.xml)[0] assert node.text == "Some \n content"
def test_build_from_section_collapsed_level(): with section_ctx() as ctx: ctx.child_from_string( '<P>(a) <E T="03">Transfers </E>—(1) <E T="03">Notice.</E> ' 'follow</P>') ctx.P("(2) More text") ctx.child_from_string('<P>(b) <E T="03">Contents</E> (1) Here</P>') ctx.P("(2) More text") node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.child_labels == ['a', 'b'] assert node['a'].child_labels == ['1', '2'] assert node['b'].child_labels == ['1', '2']
def test_build_from_section_collapsed_level_emph(): with section_ctx() as ctx: ctx.P("(a) aaaa") ctx.P("(1) 1111") ctx.P("(i) iiii") ctx.child_from_string('<P>(A) AAA—(<E T="03">1</E>) eeee</P>') ctx.STARS() node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) a1ia = node['a']['1']['i']['A'] assert a1ia.text == "(A) AAA—" assert a1ia.child_labels == ['1'] assert a1ia['1'].text == "(1) eeee"
def test_build_from_section_notes(): """Account for paragraphs within a NOTES tag""" with section_ctx() as ctx: ctx.P("(a) aaaa") with ctx.NOTES(): ctx.PRTPAGE(P="8") ctx.P("1. Some content") ctx.P("2. Other content") node = NodeAccessor(section.build_from_section('8675', ctx.xml)[0]) assert node.child_labels == ['a'] assert node['a'].child_labels == ['p1'] assert node['a']['p1'].node_type == Node.NOTE assert node['a']['p1'].child_labels == ['1', '2']
def test_build_from_section_fp(): with section_ctx() as ctx: ctx.P("(a) aaa") ctx.P("(b) bbb") ctx.FP("fpfpfp") ctx.P("(c) ccc") node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.child_labels == ['a', 'b', 'c'] assert node['a'].child_labels == [] assert node['b'].child_labels == ['p1'] assert node['b']['p1'].child_labels == [] assert node['c'].child_labels == []
def test_build_from_section_image(): """We should process images (GPH/GID)""" with XMLBuilder("SECTION", "\n\n") as ctx: ctx.SECTNO("§ 8675.309") ctx.SUBJECT("subsubsub") ctx.P("(a) aaa") with ctx.GPH(): ctx.GID("a-gid") ctx.P("(b) bbb") node = NodeAccessor(section.build_from_section('8675', ctx.xml)[0]) assert node.child_labels == ['a', 'b'] assert node['a'].child_labels == ['p1'] assert node['a']['p1'].text == '![](a-gid)'
def build_subjgrp(reg_part, subjgrp_xml, letter_list): # This handles subjgrps that have been pulled out and injected into the # same level as subparts. subjgrp_title = get_subpart_group_title(subjgrp_xml) letter_list, subjgrp = reg_text.build_subjgrp(subjgrp_title, reg_part, letter_list) sections = [] for ch in subjgrp_xml.getchildren(): if ch.tag == 'SECTION': sections.extend(build_from_section(reg_part, ch)) subjgrp.children = sections return subjgrp
def test_build_from_section_italic_levels(): with section_ctx() as ctx: ctx.P("(a) aaa") ctx.P("(1) 111") ctx.P("(i) iii") ctx.P("(A) AAA") ctx.child_from_string('<P>(<E T="03">1</E>) i1i1i1</P>') ctx.child_from_string('<P>\n(<E T="03">2</E>) i2i2i2</P>') node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.child_labels == ['a'] assert node['a'].child_labels == ['1'] assert node['a']['1'].child_labels == ['i'] assert node['a']['1']['i'].child_labels == ['A'] assert node['a']['1']['i']['A'].child_labels == ['1', '2']
def test_build_from_section_table(): """Account for regtext with a table""" with section_ctx() as ctx: ctx.P("(a) aaaa") with ctx.GPOTABLE(CDEF="s25,10", COLS=2, OPTS="L2,i1"): with ctx.BOXHD(): ctx.CHED(H=1) ctx.CHED("Header", H=1) with ctx.ROW(): ctx.ENT("Left content", I="01") ctx.ENT("Right content") node = section.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) assert node.child_labels == ['a'] assert node['a'].child_labels == ['p1'] assert node['a']['p1'].text == ( "||Header|\n|---|---|\n|Left content|Right content|") assert node['a']['p1'].source_xml.tag == 'GPOTABLE'
def test_build_from_section_extract_with_table(): """Account for regtext with a table in an extract""" subject = "Table of distances for storage of low explosives." with XMLBuilder("SECTION") as ctx: ctx.SECTNO("§ 555.219") ctx.SUBJECT(subject) with ctx.EXTRACT(): _add_table(ctx) node = section.build_from_section('555', ctx.xml)[0] node = NodeAccessor(node) assert node.title == '§ 555.219 ' + subject assert node.node_type == 'regtext' assert node.label == ['555', '219'] assert node.child_labels == ['p1'] assert node['p1'].node_type == 'extract' assert node['p1'].child_labels == ['p1'] assert node['p1']['p1'].node_type == 'regtext' assert node['p1']['p1'].source_xml.tag == 'GPOTABLE' assert node['p1']['p1'].tagged_text.startswith('<GPOTABLE')
def test_build_from_section_extract_with_table_and_headers(): """Account for regtext with a header and a table in an extract""" subject = 'Table of distances for storage of low explosives.' table_first_header_text = ( "Table: Department of Defense Ammunition and Explosives Standards, " "Table 5-4.1 Extract; 4145.27 M, March 1969" ) table_second_header_text = ( "Table: National Fire Protection Association (NFPA) Official " "Standard No. 492, 1968" ) with XMLBuilder("SECTION") as ctx: ctx.SECTNO("§ 555.219") ctx.SUBJECT(subject) with ctx.EXTRACT(): ctx.HD(table_first_header_text, SOURCE='HD1') _add_table(ctx) ctx.HD(table_second_header_text, SOURCE='HD1') node = section.build_from_section('555', ctx.xml)[0] node = NodeAccessor(node) assert node.title == '§ 555.219 ' + subject assert node.node_type == 'regtext' assert node.label == ['555', '219'] assert node.child_labels == ['p1'] assert node['p1'].node_type == 'extract' assert node['p1'].child_labels == ['p1', 'p2', 'p3'] assert node['p1']['p1'].node_type == 'regtext' assert node['p1']['p1'].text == '' assert node['p1']['p1'].title == table_first_header_text assert node['p1']['p1'].children == [] assert node['p1']['p2'].node_type == 'regtext' assert node['p1']['p2'].source_xml.tag == 'GPOTABLE' assert node['p1']['p2'].children == [] assert node['p1']['p3'].node_type == 'regtext' assert node['p1']['p3'].text == '' assert node['p1']['p3'].title == table_second_header_text assert node['p1']['p3'].children == []
def test_build_from_section_extract_with_table_and_headers(): """Account for regtext with a header and a table in an extract""" subject = 'Table of distances for storage of low explosives.' table_first_header_text = ( "Table: Department of Defense Ammunition and Explosives Standards, " "Table 5-4.1 Extract; 4145.27 M, March 1969") table_second_header_text = ( "Table: National Fire Protection Association (NFPA) Official " "Standard No. 492, 1968") with XMLBuilder("SECTION") as ctx: ctx.SECTNO("§ 555.219") ctx.SUBJECT(subject) with ctx.EXTRACT(): ctx.HD(table_first_header_text, SOURCE='HD1') _add_table(ctx) ctx.HD(table_second_header_text, SOURCE='HD1') node = section.build_from_section('555', ctx.xml)[0] node = NodeAccessor(node) assert node.title == '§ 555.219 ' + subject assert node.node_type == 'regtext' assert node.label == ['555', '219'] assert node.child_labels == ['p1'] assert node['p1'].node_type == 'extract' assert node['p1'].child_labels == ['p1', 'p2', 'p3'] assert node['p1']['p1'].node_type == 'regtext' assert node['p1']['p1'].text == '' assert node['p1']['p1'].title == table_first_header_text assert node['p1']['p1'].children == [] assert node['p1']['p2'].node_type == 'regtext' assert node['p1']['p2'].source_xml.tag == 'GPOTABLE' assert node['p1']['p2'].children == [] assert node['p1']['p3'].node_type == 'regtext' assert node['p1']['p3'].text == '' assert node['p1']['p3'].title == table_second_header_text assert node['p1']['p3'].children == []
def parse_regtext(): sections = build_from_section(label_parts[0], xml) if sections: return sections[0]
def test_build_from_section_section_with_nondigits(): with section_ctx(section="309a") as ctx: ctx.P("Intro content here") node = section.build_from_section('8675', ctx.xml)[0] assert node.label == ['8675', '309a'] assert node.children == []