def test_build_from_section_unnumbered_defs(self): xml = u""" <SECTION> <SECTNO>§ 8675.309</SECTNO> <SUBJECT>Definitions.</SUBJECT> <P depth="1">(a) This is what things mean:</P> <P depth="1">foo means bar</P> <P depth="1">bop means baz</P> </SECTION> """ node = reg_text.build_from_section('8675', etree.fromstring(xml))[0] self.assertEqual('', node.text.strip()) self.assertEqual(3, len(node.children)) self.assertEqual(['8675', '309'], node.label) child = node.children[0] self.assertEqual('(a) This is what things mean:', child.text.strip()) self.assertEqual(0, len(child.children)) self.assertEqual(['8675', '309', 'a'], child.label) child = node.children[1] self.assertEqual('foo means bar', child.text.strip()) self.assertEqual(0, len(child.children)) self.assertEqual(['8675', '309', 'Foo'], child.label) child = node.children[2] self.assertEqual('bop means baz', child.text.strip()) self.assertEqual(0, len(child.children)) self.assertEqual(['8675', '309', 'Bop'], child.label)
def test_build_from_section_unnumbered_defs(self): xml = u""" <SECTION> <SECTNO>§ 8675.309</SECTNO> <SUBJECT>Definitions.</SUBJECT> <P>(a) This is what things mean:</P> <P>foo means bar</P> <P>bop means baz</P> </SECTION> """ node = reg_text.build_from_section('8675', etree.fromstring(xml))[0] self.assertEqual('', node.text.strip()) self.assertEqual(3, len(node.children)) self.assertEqual(['8675', '309'], node.label) child = node.children[0] self.assertEqual('(a) This is what things mean:', child.text.strip()) self.assertEqual(0, len(child.children)) self.assertEqual(['8675', '309', 'a'], child.label) child = node.children[1] self.assertEqual('foo means bar', child.text.strip()) self.assertEqual(0, len(child.children)) self.assertEqual(['8675', '309', 'Foo'], child.label) child = node.children[2] self.assertEqual('bop means baz', child.text.strip()) self.assertEqual(0, len(child.children)) self.assertEqual(['8675', '309', 'Bop'], child.label)
def test_build_from_4_section_reserved_range(self): with XMLBuilder("SECTION") as ctx: ctx.SECTNO(u"§§ 8675.309-8675.312") ctx.RESERVED("[Reserved]") n309 = reg_text.build_from_section('8675', ctx.xml)[0] self.assertEqual(n309.label, ['8675', '309']) self.assertEqual(u'§§ 8675.309-312 [Reserved]', n309.title)
def test_build_from_section_reserved(self): with self.tree.builder("SECTION") as root: root.SECTNO(u"§ 8675.309") root.RESERVED("[Reserved]") node = reg_text.build_from_section('8675', self.tree.render_xml())[0] self.assertEqual(node.label, ['8675', '309']) self.assertEqual(u'§ 8675.309 [Reserved]', node.title) self.assertEqual([], node.children)
def test_build_from_section_reserved(self): with XMLBuilder("SECTION") as ctx: ctx.SECTNO(u"§ 8675.309") ctx.RESERVED("[Reserved]") node = reg_text.build_from_section('8675', ctx.xml)[0] self.assertEqual(node.label, ['8675', '309']) self.assertEqual(u'§ 8675.309 [Reserved]', node.title) self.assertEqual([], node.children)
def process_amendments(notice, notice_xml): """ Process the changes to the regulation that are expressed in the notice. """ amends = [] notice_changes = changes.NoticeChanges() amdpars_by_parent = [] for par in notice_xml.xpath('//AMDPAR'): parent = par.getparent() exists = filter(lambda aXp: aXp.parent == parent, amdpars_by_parent) if exists: exists[0].append(par) else: amdpars_by_parent.append(AmdparByParent(parent, par)) for aXp in amdpars_by_parent: amended_labels = [] designate_labels, other_labels = [], [] context = [aXp.parent.get('PART') or notice['cfr_part']] for par in aXp.amdpars: als, context = parse_amdpar(par, context) amended_labels.extend(als) for al in amended_labels: if isinstance(al, DesignateAmendment): subpart_changes = process_designate_subpart(al) if subpart_changes: notice_changes.update(subpart_changes) designate_labels.append(al) elif new_subpart_added(al, notice['cfr_part']): notice_changes.update(process_new_subpart(notice, al, par)) designate_labels.append(al) else: other_labels.append(al) create_xmlless_changes(other_labels, notice_changes) section_xml = find_section(par) if section_xml is not None: for section in reg_text.build_from_section( notice['cfr_part'], section_xml): create_xml_changes(other_labels, section, notice_changes) for appendix in parse_appendix_changes(other_labels, notice['cfr_part'], aXp.parent): create_xml_changes(other_labels, appendix, notice_changes) interp = parse_interp_changes(other_labels, notice['cfr_part'], aXp.parent) if interp: create_xml_changes(other_labels, interp, notice_changes) amends.extend(designate_labels) amends.extend(other_labels) if amends: notice['amendments'] = amends notice['changes'] = notice_changes.changes
def test_build_from_section_double_collapsed(self): with self.section() as root: root.P(_xml=u"""(a) <E T="03">Keyterm</E>—(1)(i) Content""") root.P("(ii) Content2") node = reg_text.build_from_section('8675', self.tree.render_xml())[0] node = self.node_accessor(node, ['8675', '309']) self.assertEqual(['a'], node.child_labels) self.assertEqual(['1'], node['a'].child_labels) self.assertEqual(['i', 'ii'], node['a']['1'].child_labels)
def test_build_from_section_bad_spaces(self): with self.section(section=16) as root: root.STARS() root.P(_xml="""(b)<E T="03">General.</E>Content Content.""") node = reg_text.build_from_section('8675', self.tree.render_xml())[0] node = self.node_accessor(node, ['8675', '16']) self.assertEqual(['b'], node.child_labels) self.assertEqual(node['b'].text.strip(), "(b) General. Content Content.")
def process_amendments(notice, notice_xml): """Process changes to the regulation that are expressed in the notice.""" all_amends = [] # will be added to the notice cfr_part = notice['cfr_parts'][0] notice_changes = changes.NoticeChanges() # process amendments in batches, based on their parent XML for amdparent in notice_xml.xpath('//AMDPAR/..'): context = [amdparent.get('PART') or cfr_part] amendments_by_section = defaultdict(list) normal_amends = [] # amendments not moving or adding a subpart for amdpar in amdparent.xpath('.//AMDPAR'): instructions = amdpar.xpath('./EREGS_INSTRUCTIONS') if not instructions: logger.warning('No <EREGS_INSTRUCTIONS>. Was this notice ' 'preprocessed?') continue instructions = instructions[0] amendments = [amendment_from_xml(el) for el in instructions] context = [None if l is '?' else l for l in instructions.get('final_context').split('-')] section_xml = find_section(amdpar) for amendment in amendments: all_amends.append(amendment) if isinstance(amendment, DesignateAmendment): subpart_changes = process_designate_subpart(amendment) if subpart_changes: notice_changes.update(subpart_changes) elif new_subpart_added(amendment): notice_changes.update(process_new_subpart( notice, amendment, amdpar)) elif section_xml is None: normal_amends.append(amendment) else: normal_amends.append(amendment) amendments_by_section[section_xml].append(amendment) cfr_part = context[0] # carry the part through to the next amdparent create_xmlless_changes(normal_amends, notice_changes) # Process amendments relating to a specific section in batches, too for section_xml, related_amends in amendments_by_section.items(): for section in reg_text.build_from_section(cfr_part, section_xml): create_xml_changes(related_amends, section, notice_changes) for appendix in parse_appendix_changes(normal_amends, cfr_part, amdparent): create_xml_changes(normal_amends, appendix, notice_changes) interp = parse_interp_changes(normal_amends, cfr_part, amdparent) if interp: create_xml_changes(normal_amends, interp, notice_changes) if all_amends: notice['amendments'] = all_amends notice['changes'] = notice_changes.changes return notice
def _setup_for_ambiguous(self, final_par): with self.section() as ctx: ctx.P("(g) Some Content") ctx.P("(h) H Starts") ctx.P("(1) H-1") ctx.P("(2) H-2") ctx.P("(i) Is this 8675-309-h-2-i or 8675-309-i") ctx.P(final_par) node = reg_text.build_from_section('8675', ctx.xml)[0] return NodeAccessor(node)
def _setup_for_ambiguous(self, final_par): with self.section() as root: root.P("(g) Some Content") root.P("(h) H Starts") root.P("(1) H-1") root.P("(2) H-2") root.P("(i) Is this 8675-309-h-2-i or 8675-309-i") root.P(final_par) node = reg_text.build_from_section('8675', self.tree.render_xml())[0] return self.node_accessor(node, ['8675', '309'])
def test_build_from_section_reserved(self): xml = u""" <SECTION> <SECTNO>§ 8675.309</SECTNO> <RESERVED>[Reserved]</RESERVED> </SECTION>""" node = reg_text.build_from_section('8675', etree.fromstring(xml))[0] self.assertEqual(node.label, ['8675', '309']) self.assertEqual(u'§ 8675.309 [Reserved]', node.title) self.assertEqual([], node.children)
def test_build_from_section_double_collapsed(self): with self.section() as ctx: ctx.child_from_string( u'<P>(a) <E T="03">Keyterm</E>—(1)(i) Content</P>') ctx.P("(ii) Content2") node = reg_text.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) self.assertEqual(['a'], node.child_labels) self.assertEqual(['1'], node['a'].child_labels) self.assertEqual(['i', 'ii'], node['a']['1'].child_labels)
def test_build_from_section_double_alpha(self): # Ensure we match a hierarchy like (x), (y), (z), (aa), (bb)… with XMLBuilder("SECTION") as ctx: ctx.SECTNO(u"§ 8675.309") ctx.SUBJECT("Definitions.") ctx.P("(aa) This is what things mean:") node = reg_text.build_from_section('8675', ctx.xml)[0] child = node.children[0] self.assertEqual('(aa) This is what things mean:', child.text.strip()) self.assertEqual(['8675', '309', 'aa'], child.label)
def test_build_from_section_ambiguous(self): xml = u""" <SECTION> <SECTNO>§ 8675.309</SECTNO> <SUBJECT>Definitions.</SUBJECT> <P>(g) Some Content</P> <P>(h) H Starts</P> <P>(1) H-1</P> <P>(2) H-2</P> <P>(i) Is this 8675-309-h-2-i or 8675-309-i</P> <P>%s</P> </SECTION> """ n8675_309 = reg_text.build_from_section( '8675', etree.fromstring(xml % '(ii) A'))[0] n8675_309_h = n8675_309.children[1] n8675_309_h_2 = n8675_309_h.children[1] self.assertEqual(2, len(n8675_309.children)) self.assertEqual(2, len(n8675_309_h.children)) self.assertEqual(2, len(n8675_309_h_2.children)) n8675_309 = reg_text.build_from_section( '8675', etree.fromstring(xml % '(A) B'))[0] n8675_309_h = n8675_309.children[1] n8675_309_h_2 = n8675_309_h.children[1] n8675_309_h_2_i = n8675_309_h_2.children[0] self.assertEqual(2, len(n8675_309.children)) self.assertEqual(2, len(n8675_309_h.children)) self.assertEqual(1, len(n8675_309_h_2.children)) self.assertEqual(1, len(n8675_309_h_2_i.children)) n8675_309 = reg_text.build_from_section( '8675', etree.fromstring(xml % '(1) C'))[0] self.assertEqual(3, len(n8675_309.children)) n8675_309 = reg_text.build_from_section( '8675', etree.fromstring(xml % '(3) D'))[0] n8675_309_h = n8675_309.children[1] n8675_309_h_2 = n8675_309_h.children[1] self.assertEqual(2, len(n8675_309.children)) self.assertEqual(3, len(n8675_309_h.children)) self.assertEqual(1, len(n8675_309_h_2.children))
def test_build_from_section_reserved_range(self): with XMLBuilder("SECTION") as ctx: ctx.SECTNO(u"§§ 8675.309-8675.311") ctx.RESERVED("[Reserved]") n309, n310, n311 = reg_text.build_from_section('8675', ctx.xml) self.assertEqual(n309.label, ['8675', '309']) self.assertEqual(n310.label, ['8675', '310']) self.assertEqual(n311.label, ['8675', '311']) self.assertEqual(u'§ 8675.309 [Reserved]', n309.title) self.assertEqual(u'§ 8675.310 [Reserved]', n310.title) self.assertEqual(u'§ 8675.311 [Reserved]', n311.title)
def test_build_from_section_intro_text(self): with self.section() as root: root.P("Some content about this section.") root.P("(a) something something") node = reg_text.build_from_section('8675', self.tree.render_xml())[0] node = self.node_accessor(node, ['8675', '309']) self.assertEqual('Some content about this section.', node.text.strip()) self.assertEqual(['a'], node.child_labels) self.assertEqual('(a) something something', node['a'].text.strip()) self.assertEqual([], node['a'].children)
def test_build_from_section_bad_spaces(self): with self.section(section=16) as ctx: ctx.STARS() ctx.child_from_string( '<P>(b)<E T="03">General.</E>Content Content.</P>') node = reg_text.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) self.assertEqual(['8675', '16'], node.label) self.assertEqual(['b'], node.child_labels) self.assertEqual(node['b'].text.strip(), "(b) General. Content Content.")
def test_build_from_section_intro_text(self): with self.section() as ctx: ctx.P("Some content about this section.") ctx.P("(a) something something") node = reg_text.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) self.assertEqual('Some content about this section.', node.text.strip()) self.assertEqual(['a'], node.child_labels) self.assertEqual('(a) something something', node['a'].text.strip()) self.assertEqual([], node['a'].children)
def test_build_from_3_section_reserved_range(self): with XMLBuilder("SECTION") as ctx: ctx.SECTNO(u"§§ 8675.309-8675.311") ctx.RESERVED("[Reserved]") n309, n310, n311 = reg_text.build_from_section('8675', ctx.xml) self.assertEqual(n309.label, ['8675', '309']) self.assertEqual(n310.label, ['8675', '310']) self.assertEqual(n311.label, ['8675', '311']) self.assertEqual(u'§ 8675.309 [Reserved]', n309.title) self.assertEqual(u'§ 8675.310 [Reserved]', n310.title) self.assertEqual(u'§ 8675.311 [Reserved]', n311.title)
def test_build_from_section_collapsed(self): with self.section() as root: root.P("(a) aaa") root.P("(1) 111") root.P(_xml=u"""(2) 222—(i) iii. (A) AAA""") root.P("(B) BBB") n309 = reg_text.build_from_section('8675', self.tree.render_xml())[0] n309 = self.node_accessor(n309, ['8675', '309']) self.assertEqual(['a'], n309.child_labels) self.assertEqual(['1', '2'], n309['a'].child_labels) self.assertEqual(['i'], n309['a']['2'].child_labels) self.assertEqual(['A', 'B'], n309['a']['2']['i'].child_labels)
def test_build_from_section_collapsed_level(self): with self.section() as root: root.P(_xml=u"""(a) <E T="03">Transfers </E>—(1) <E T="03">Notice.</E> follow""") root.P("(2) More text") root.P(_xml="""(b) <E T="03">Contents</E> (1) Here""") root.P("(2) More text") node = reg_text.build_from_section('8675', self.tree.render_xml())[0] node = self.node_accessor(node, ['8675', '309']) self.assertEqual(['a', 'b'], node.child_labels) self.assertEqual(['1', '2'], node['a'].child_labels) self.assertEqual(['1', '2'], node['b'].child_labels)
def test_build_from_section_whitespace(self): """The whitespace in the section text (and intro paragraph) should get removed""" with self.tree.builder("SECTION", node_value="\n\n") as root: root.SECTNO(u"§ 8675.309") root.SUBJECT("subsubsub") root.P(" Some \n content\n") root.P("(a) aaa") root.P("(b) bbb") node = reg_text.build_from_section('8675', self.tree.render_xml())[0] self.assertEqual(node.text, "Some \n content")
def test_build_from_section_reserved_range(self): with self.tree.builder("SECTION") as root: root.SECTNO(u"§§ 8675.309-8675.311") root.RESERVED("[Reserved]") n309, n310, n311 = reg_text.build_from_section( '8675', self.tree.render_xml()) self.assertEqual(n309.label, ['8675', '309']) self.assertEqual(n310.label, ['8675', '310']) self.assertEqual(n311.label, ['8675', '311']) self.assertEqual(u'§ 8675.309 [Reserved]', n309.title) self.assertEqual(u'§ 8675.310 [Reserved]', n310.title) self.assertEqual(u'§ 8675.311 [Reserved]', n311.title)
def test_build_from_section_collapsed(self): with self.section() as ctx: ctx.P("(a) aaa") ctx.P("(1) 111") ctx.child_from_string(u'<P>(2) 222—(i) iii. (A) AAA</P>') ctx.P("(B) BBB") n309 = reg_text.build_from_section('8675', ctx.xml)[0] n309 = NodeAccessor(n309) self.assertEqual(['a'], n309.child_labels) self.assertEqual(['1', '2'], n309['a'].child_labels) self.assertEqual(['i'], n309['a']['2'].child_labels) self.assertEqual(['A', 'B'], n309['a']['2']['i'].child_labels)
def test_build_from_section_whitespace(self): """The whitespace in the section text (and intro paragraph) should get removed""" with XMLBuilder("SECTION", "\n\n") as ctx: ctx.SECTNO(u"§ 8675.309") ctx.SUBJECT("subsubsub") ctx.P(" Some \n content\n") ctx.P("(a) aaa") ctx.P("(b) bbb") node = reg_text.build_from_section('8675', ctx.xml)[0] self.assertEqual(node.text, "Some \n content")
def find_diffs(xml_tree, cfr_part): """Find the XML nodes that are needed to determine diffs""" # Only final notices have this format for section in xml_tree.xpath('//REGTEXT//SECTION'): section = clear_between(section, '[', ']') section = remove_char(remove_char(section, u'▸'), u'◂') for node in build_from_section(cfr_part, section): def per_node(node): if node_is_empty(node): for c in node.children: per_node(c) per_node(node)
def test_build_from_section_extract_with_table(self): """Account for regtext with a table in an extract""" subject = "Table of distances for storage of low explosives." railroad = "From public railroad and highway distance (feet)" xml = etree.fromstring(""" <SECTION> <SECTNO>§ 555.219</SECTNO> <SUBJECT>%s</SUBJECT> <EXTRACT> <GPOTABLE CDEF="6.1,6.1,5.2,5.2,5.2" COLS="5" OPTS="L2"> <BOXHD> <CHED H="1">Pounds</CHED> <CHED H="2">Over</CHED> <CHED H="2">Not over</CHED> <CHED H="1">From inhabited building distance (feet)</CHED> <CHED H="1">%s</CHED> <CHED H="1">From above ground magazine (feet)</CHED> </BOXHD> <ROW> <ENT I="01">0</ENT> <ENT>1,000</ENT> <ENT>75</ENT> <ENT>75</ENT> <ENT>50</ENT> </ROW> <ROW> <ENT I="01">1,000</ENT> <ENT>5,000</ENT> <ENT>115</ENT> <ENT>115</ENT> <ENT>75</ENT> </ROW> </GPOTABLE> </EXTRACT> </SECTION> """ % (subject, railroad)) nodes = reg_text.build_from_section('555', xml) node = nodes[0] self.assertEqual(u'§ 555.219 %s' % subject, node.title) self.assertEqual('regtext', node.node_type) self.assertEqual(['555', '219'], node.label) self.assertEqual(1, len(node.children)) extract_node = node.children[0] self.assertEqual('extract', extract_node.node_type) self.assertEqual(1, len(extract_node.children)) table_node = extract_node.children[0] self.assertEqual('regtext', table_node.node_type) self.assertEqual('GPOTABLE', table_node.source_xml.tag) self.assertTrue(table_node.tagged_text.startswith('<GPOTABLE'))
def test_build_from_section_double_alpha(self): # Ensure we match a hierarchy like (x), (y), (z), (aa), (bb)… xml = u""" <SECTION> <SECTNO>§ 8675.309</SECTNO> <SUBJECT>Definitions.</SUBJECT> <P>(aa) This is what things mean:</P> </SECTION> """ node = reg_text.build_from_section('8675', etree.fromstring(xml))[0] child = node.children[0] self.assertEqual('(aa) This is what things mean:', child.text.strip()) self.assertEqual(['8675', '309', 'aa'], child.label)
def test_build_from_section_collapsed_level_emph(self): with self.section() as ctx: ctx.P("(a) aaaa") ctx.P("(1) 1111") ctx.P("(i) iiii") ctx.child_from_string(u'<P>(A) AAA—(<E T="03">1</E>) eeee</P>') ctx.STARS() node = reg_text.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) a1iA = node['a']['1']['i']['A'] self.assertEqual(u"(A) AAA—", a1iA.text) self.assertEqual(['1'], a1iA.child_labels) self.assertEqual("(1) eeee", a1iA['1'].text.strip())
def test_build_from_section_bad_spaces(self): xml = u""" <SECTION> <SECTNO>§ 8675.16</SECTNO> <SUBJECT>Subby Sub Sub.</SUBJECT> <STARS/> <P>(b)<E T="03">General.</E>Content Content.</P> </SECTION> """ node = reg_text.build_from_section('8675', etree.fromstring(xml))[0] self.assertEqual(1, len(node.children)) nb = node.children[0] self.assertEqual(nb.text.strip(), "(b) General. Content Content.")
def test_build_from_section_fp(self): with self.section() as ctx: ctx.P("(a) aaa") ctx.P("(b) bbb") ctx.FP("fpfpfp") ctx.P("(c) ccc") node = reg_text.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) self.assertEqual(['a', 'b', 'c'], node.child_labels) self.assertEqual([], node['a'].child_labels) self.assertEqual(['p1'], node['b'].child_labels) self.assertEqual([], node['b']['p1'].child_labels) self.assertEqual([], node['c'].child_labels)
def test_build_from_section_collapsed_level(self): with self.section() as ctx: ctx.child_from_string( u'<P>(a) <E T="03">Transfers </E>—(1) <E T="03">Notice.</E> ' u'follow</P>') ctx.P("(2) More text") ctx.child_from_string('<P>(b) <E T="03">Contents</E> (1) Here</P>') ctx.P("(2) More text") node = reg_text.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) self.assertEqual(['a', 'b'], node.child_labels) self.assertEqual(['1', '2'], node['a'].child_labels) self.assertEqual(['1', '2'], node['b'].child_labels)
def test_build_from_section_notes(self): """Account for paragraphs within a NOTES tag""" with self.section() as ctx: ctx.P("(a) aaaa") with ctx.NOTES(): ctx.PRTPAGE(P="8") ctx.P("1. Some content") ctx.P("2. Other content") node = NodeAccessor(reg_text.build_from_section('8675', ctx.xml)[0]) self.assertEqual(['a'], node.child_labels) self.assertEqual(['p1'], node['a'].child_labels) self.assertEqual(Node.NOTE, node['a']['p1'].node_type) self.assertEqual(['1', '2'], node['a']['p1'].child_labels)
def test_build_from_section_image(self): """We should process images (GPH/GID)""" with XMLBuilder("SECTION", "\n\n") as ctx: ctx.SECTNO(u"§ 8675.309") ctx.SUBJECT("subsubsub") ctx.P("(a) aaa") with ctx.GPH(): ctx.GID("a-gid") ctx.P("(b) bbb") node = NodeAccessor(reg_text.build_from_section('8675', ctx.xml)[0]) self.assertEqual(['a', 'b'], node.child_labels) self.assertEqual(['p1'], node['a'].child_labels) self.assertEqual('![](a-gid)', node['a']['p1'].text)
def test_build_from_section_reserved_range(self): xml = u""" <SECTION> <SECTNO>§§ 8675.309-8675.311</SECTNO> <RESERVED>[Reserved]</RESERVED> </SECTION>""" n309, n310, n311 = reg_text.build_from_section('8675', etree.fromstring(xml)) self.assertEqual(n309.label, ['8675', '309']) self.assertEqual(n310.label, ['8675', '310']) self.assertEqual(n311.label, ['8675', '311']) self.assertEqual(u'§ 8675.309 [Reserved]', n309.title) self.assertEqual(u'§ 8675.310 [Reserved]', n310.title) self.assertEqual(u'§ 8675.311 [Reserved]', n311.title)
def test_build_from_section_italic_levels(self): with self.section() as ctx: ctx.P("(a) aaa") ctx.P("(1) 111") ctx.P("(i) iii") ctx.P("(A) AAA") ctx.child_from_string('<P>(<E T="03">1</E>) i1i1i1</P>') ctx.child_from_string('<P>\n(<E T="03">2</E>) i2i2i2</P>') node = reg_text.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) self.assertEqual(['a'], node.child_labels) self.assertEqual(['1'], node['a'].child_labels) self.assertEqual(['i'], node['a']['1'].child_labels) self.assertEqual(['A'], node['a']['1']['i'].child_labels) self.assertEqual(['1', '2'], node['a']['1']['i']['A'].child_labels)
def test_build_from_section_collapsed_level_emph(self): xml = u""" <SECTION> <SECTNO>§ 8675.309</SECTNO> <SUBJECT>Definitions.</SUBJECT> <P>(a) aaaa</P> <P>(1) 1111</P> <P>(i) iiii</P> <P>(A) AAA—(<E T="03">1</E>) eeee</P> </SECTION> """ node = reg_text.build_from_section('8675', etree.fromstring(xml))[0] a1iA = node.children[0].children[0].children[0].children[0] self.assertEqual(u"(A) AAA—", a1iA.text) self.assertEqual(1, len(a1iA.children)) self.assertEqual("(1) eeee", a1iA.children[0].text.strip())
def test_build_from_section_table(self): """Account for regtext with a table""" with self.section() as ctx: ctx.P("(a) aaaa") with ctx.GPOTABLE(CDEF="s25,10", COLS=2, OPTS="L2,i1"): with ctx.BOXHD(): ctx.CHED(H=1) ctx.CHED("Header", H=1) with ctx.ROW(): ctx.ENT("Left content", I="01") ctx.ENT("Right content") node = reg_text.build_from_section('8675', ctx.xml)[0] node = NodeAccessor(node) self.assertEqual(['a'], node.child_labels) self.assertEqual(['p1'], node['a'].child_labels) self.assertEqual("||Header|\n|---|---|\n|Left content|Right content|", node['a']['p1'].text) self.assertEqual("GPOTABLE", node['a']['p1'].source_xml.tag)
def test_build_from_section_example(self): """Account for paragraphs within an EXAMPLE tag""" with self.section() as ctx: ctx.P("(a) aaaa") with ctx.EXAMPLE(): ctx.P("You need a form if:") ctx.P("1. Some content") ctx.P("2. Other content") with ctx.EXAMPLE(): ctx.P("You do not need a form if:") ctx.P("1. Some content") ctx.P("2. Other content") node = reg_text.build_from_section('8675', ctx.xml)[0] a = node.children[0] self.assertEqual(u'(a) aaaa', a.text) self.assertEqual(2, len(a.children)) self.assertEqual(['8675', '309', 'a'], a.label) example_one = a.children[0] self.assertEqual(u'', example_one.text) self.assertEqual(3, len(example_one.children)) self.assertEqual(['8675', '309', 'a', 'p1'], example_one.label) children = example_one.children self.assertEqual(u'You need a form if:', children[0].text) self.assertEqual(['8675', '309', 'a', 'p1', 'p1'], children[0].label) self.assertEqual(u'1. Some content', children[1].text) self.assertEqual(['8675', '309', 'a', 'p1', 'p2'], children[1].label) self.assertEqual(u'2. Other content', children[2].text) self.assertEqual(['8675', '309', 'a', 'p1', 'p3'], children[2].label) example_two = a.children[1] self.assertEqual(u'', example_two.text) self.assertEqual(3, len(example_two.children)) self.assertEqual(['8675', '309', 'a', 'p2'], example_two.label) children = example_two.children self.assertEqual(u'You do not need a form if:', children[0].text) self.assertEqual(['8675', '309', 'a', 'p2', 'p1'], children[0].label) self.assertEqual(u'1. Some content', children[1].text) self.assertEqual(['8675', '309', 'a', 'p2', 'p2'], children[1].label) self.assertEqual(u'2. Other content', children[2].text) self.assertEqual(['8675', '309', 'a', 'p2', 'p3'], children[2].label)
def test_manual_hierarchy(self): xml = u""" <SECTION> <SECTNO>§ 1234.567</SECTNO> <SUBJECT>Definitions.</SUBJECT> <P>(a) Some terms are defined in statute</P> <P>(b) Other terms are:</P> <P><E T="03">Term one</E> means the first term.</P> <P><E T="03">Term two</E> means:</P> <P>(1)(i) The term that follows one</P> <P>(ii) The second term</P> <P>(2) A term we've arbitrarily labeled two</P> <P><E T="03">Term three</E> means the third term.</P> </SECTION> """ reg_text.PARAGRAPH_HIERARCHY['1234'] = { '1234.567': [int(num) for num in """ 1 1 2 2 3 4 4 3 2""".split()]} node = reg_text.build_from_section('1234', etree.fromstring(xml))[0] self.assertEqual(2, len(node.children)) defs_b = node.children[1] self.assertEqual(3, len(defs_b.children)) # term one self.assertEqual(0, len(defs_b.children[0].children)) # term two term_two = defs_b.children[1] self.assertEqual(2, len(term_two.children)) self.assertEqual(2, len(term_two.children[0].children)) self.assertEqual(0, len(term_two.children[1].children)) # term three self.assertEqual(0, len(defs_b.children[2].children))
def test_build_from_section_collapsed(self): xml = u""" <SECTION> <SECTNO>§ 8675.309</SECTNO> <SUBJECT>Definitions.</SUBJECT> <P>(a) aaa</P> <P>(1) 111</P> <P>(2) 222—(i) iii. (A) AAA</P> <P>(B) BBB</P> </SECTION> """ n309 = reg_text.build_from_section('8675', etree.fromstring(xml))[0] self.assertEqual(1, len(n309.children)) n309_a = n309.children[0] self.assertEqual(2, len(n309_a.children)) n309_a_2 = n309_a.children[1] self.assertEqual(1, len(n309_a_2.children)) n309_a_2_i = n309_a_2.children[0] self.assertEqual(2, len(n309_a_2_i.children))