def test_build_supplement_tree_appendix_paragraphs(self): xml = """<APPENDIX> <HD SOURCE="HED"> Supplement I to Part 737-Official Interpretations</HD> <HD SOURCE="HD2">Appendix H</HD> <HD SOURCE="HD3">(b) bbbbbbb</HD> <P>1. Paragraph b</P> <HD SOURCE="HD3">(b)(5) b5b5b5</HD> <P>1. Paragraph b5</P> </APPENDIX>""" tree = interpretations.build_supplement_tree('737', etree.fromstring(xml)) self.assertEqual(['737', 'Interp'], tree.label) self.assertEqual(1, len(tree.children)) iH = tree.children[0] self.assertEqual(['737', 'H', 'Interp'], iH.label) self.assertEqual(1, len(iH.children)) iHb = iH.children[0] self.assertEqual(['737', 'H', 'b', 'Interp'], iHb.label) self.assertEqual(2, len(iHb.children)) iHb1, iHb5 = iHb.children self.assertEqual(['737', 'H', 'b', 'Interp', '1'], iHb1.label) self.assertEqual(['737', 'H', 'b', '5', 'Interp'], iHb5.label)
def test_build_supplement_intro_section(self): """Integration test""" xml = """<APPENDIX> <HD SOURCE="HED"> Supplement I to Part 737-Official Interpretations</HD> <HD SOURCE="HD1">Introduction</HD> <P>1. Some content. (a) Badly named</P> <P>(b) Badly named</P> <HD SOURCE="HD1">Subpart A</HD> <HD SOURCE="HD2">Section 737.13</HD> <P><E>13(a) Some Stuff!</E></P> <P>1. 131313</P> <HD SOURCE="HD2">Appendix G</HD> <P>1. G</P> </APPENDIX>""" tree = interpretations.build_supplement_tree('737', etree.fromstring(xml)) self.assertEqual(['737', 'Interp'], tree.label) self.assertEqual(3, len(tree.children)) h1, s13, g = tree.children self.assertEqual(['737', 'Interp', 'h1'], h1.label) self.assertEqual(['737', '13', 'Interp'], s13.label) self.assertEqual(['737', 'G', 'Interp'], g.label) self.assertEqual(len(h1.children), 1) self.assertEqual('1. Some content. (a) Badly named\n\n' + '(b) Badly named', h1.children[0].text.strip()) self.assertEqual(len(h1.children[0].children), 0) self.assertEqual(1, len(s13.children)) self.assertEqual('13(a) Some Stuff!', s13.children[0].title)
def test_build_supplement_tree_spacing(self): """Integration test""" xml = """<APPENDIX> <HD SOURCE="HED"> Supplement I to Part 737-Official Interpretations</HD> <HD SOURCE="HD2">Section 737.5 NASCAR</HD> <P>1.<E T="03">Phrase</E>. More Content</P> <P>i. I like<PRTPAGE />ice cream</P> <P>A. Aaaaah</P> <P><E T="03">1.</E>More info</P> </APPENDIX>""" tree = interpretations.build_supplement_tree('737', etree.fromstring(xml)) self.assertEqual(['737', 'Interp'], tree.label) self.assertEqual(1, len(tree.children)) s5 = tree.children[0] self.assertEqual(1, len(s5.children)) s51 = s5.children[0] self.assertEqual(s51.text.strip(), "1. Phrase. More Content") self.assertEqual(1, len(s51.children)) s51i = s51.children[0] self.assertEqual(s51i.text.strip(), "i. I like ice cream") self.assertEqual(1, len(s51i.children)) s51iA = s51i.children[0] self.assertEqual(s51iA.text.strip(), "A. Aaaaah") self.assertEqual(1, len(s51iA.children)) s51iA1 = s51iA.children[0] self.assertEqual(s51iA1.text.strip(), "1. More info") self.assertEqual(0, len(s51iA1.children))
def test_build_supplement_tree_skip_levels(self): xml = """<APPENDIX> <HD SOURCE="HED"> Supplement I to Part 737-Official Interpretations</HD> <HD SOURCE="HD2">Section 737.5 NASCAR</HD> <HD SOURCE="HD2">5(a)(1)(i) Access Device</HD> <P>1. Paragraph 111</P> <HD SOURCE="HD2">5(b) Other Devices</HD> <P>1. Paragraph 222</P> </APPENDIX>""" tree = interpretations.build_supplement_tree('737', etree.fromstring(xml)) self.assertEqual(['737', 'Interp'], tree.label) self.assertEqual(1, len(tree.children)) i5 = tree.children[0] self.assertEqual(['737', '5', 'Interp'], i5.label) self.assertEqual(2, len(i5.children)) i5a, i5b = i5.children self.assertEqual(['737', '5', 'a', 'Interp'], i5a.label) self.assertEqual(1, len(i5a.children)) i5a1 = i5a.children[0] self.assertEqual(['737', '5', 'a', '1', 'Interp'], i5a1.label) self.assertEqual(1, len(i5a1.children)) i5a1i = i5a1.children[0] self.assertEqual(['737', '5', 'a', '1', 'i', 'Interp'], i5a1i.label) self.assertEqual(1, len(i5a1i.children)) self.assertEqual(['737', '5', 'b', 'Interp'], i5b.label) self.assertEqual(1, len(i5b.children))
def build_non_reg_text(reg_xml, reg_part): """ This builds the tree for the non-regulation text such as Appendices and the Supplement section """ if isinstance(reg_xml, str) or isinstance(reg_xml, unicode): doc_root = etree.fromstring(reg_xml) else: doc_root = reg_xml non_reg_sects = doc_root.xpath('//PART//APPENDIX') children = [] for non_reg_sect in non_reg_sects: section_title = get_app_title(non_reg_sect) if 'Supplement' in section_title and 'Part' in section_title: children.append(build_supplement_tree(reg_part, non_reg_sect)) else: children.append(process_appendix(non_reg_sect, reg_part)) return children
def build_non_reg_text(reg_xml, reg_part): """ This builds the tree for the non-regulation text such as Appendices and the Supplement section """ if isinstance(reg_xml, str) or isinstance(reg_xml, unicode): doc_root = etree.fromstring(reg_xml) else: doc_root = reg_xml non_reg_sects = doc_root.xpath('//PART//APPENDIX') children = [] for non_reg_sect in non_reg_sects: section_title = get_app_title(non_reg_sect) if 'Supplement' in section_title and 'Part' in section_title: children.append(build_supplement_tree(reg_part, non_reg_sect)) else: children.append(process_appendix(non_reg_sect, reg_part)) return children
def build_non_reg_text(reg_xml, reg_part): """ This builds the tree for the non-regulation text such as Appendices and the Supplement section """ if isinstance(reg_xml, six.string_types): doc_root = etree.fromstring(reg_xml) else: doc_root = reg_xml non_reg_sects = doc_root.xpath('//PART//APPENDIX') logger.debug("Non Reg sections: %r", non_reg_sects) children = [] for non_reg_sect in non_reg_sects: section_title = get_app_title(non_reg_sect) logger.debug("Building non reg sect: %s", section_title) if 'Supplement' in section_title and 'Part' in section_title: children.append(build_supplement_tree(reg_part, non_reg_sect)) else: children.append(process_appendix(non_reg_sect, reg_part)) return children
def build_non_reg_text(reg_xml, reg_part): """ This builds the tree for the non-regulation text such as Appendices and the Supplement section """ if isinstance(reg_xml, six.string_types): doc_root = etree.fromstring(reg_xml) else: doc_root = reg_xml non_reg_sects = doc_root.xpath('//PART//APPENDIX') logger.debug("Non Reg sections: %r", non_reg_sects) children = [] for non_reg_sect in non_reg_sects: section_title = get_app_title(non_reg_sect) logger.debug("Building non reg sect: %s", section_title) if 'Supplement' in section_title and 'Part' in section_title: children.append(build_supplement_tree(reg_part, non_reg_sect)) else: children.append(process_appendix(non_reg_sect, reg_part)) return children
def test_build_supplement_tree_repeats(self): """Integration test""" xml = """<APPENDIX> <HD SOURCE="HED"> Supplement I to Part 737-Official Interpretations</HD> <HD SOURCE="HD2">Appendices G and H-Content</HD> <P>1. G:H</P> <HD SOURCE="HD2">Appendix G</HD> <P>1. G</P> <HD SOURCE="HD2">Appendix H</HD> <P>1. H</P> </APPENDIX>""" tree = interpretations.build_supplement_tree('737', etree.fromstring(xml)) self.assertEqual(['737', 'Interp'], tree.label) self.assertEqual(3, len(tree.children)) aGH, aG, aH = tree.children self.assertEqual(['737', 'G_H', 'Interp'], aGH.label) self.assertEqual(['737', 'G', 'Interp'], aG.label) self.assertEqual(['737', 'H', 'Interp'], aH.label)
def test_build_supplement_tree(self): """Integration test""" xml = """<APPENDIX> <HD SOURCE="HED"> Supplement I to Part 737-Official Interpretations</HD> <HD SOURCE="HD2">Section 737.5 NASCAR</HD> <P>1. Paragraph 1</P> <P>i. Paragraph i; A. Start of A</P> <HD SOURCE="HD2">5(a) Access Device</HD> <P>1. Paragraph 111</P> <P>i. Content content</P> <P>ii. More content</P> <P>A. Aaaaah</P> <P><E T="03">1.</E> More info</P> <P><E T="03">2.</E> Second info</P> <P><E T="03">3. Keyterms</E></P> </APPENDIX>""" tree = interpretations.build_supplement_tree('737', etree.fromstring(xml)) self.assertEqual(['737', 'Interp'], tree.label) self.assertEqual(1, len(tree.children)) i5 = tree.children[0] self.assertEqual(['737', '5', 'Interp'], i5.label) self.assertEqual(2, len(i5.children)) i51, i5a = i5.children self.assertEqual(['737', '5', 'Interp', '1'], i51.label) self.assertEqual(1, len(i51.children)) i51i = i51.children[0] self.assertEqual(['737', '5', 'Interp', '1', 'i'], i51i.label) self.assertEqual(1, len(i51i.children)) i51iA = i51i.children[0] self.assertEqual(['737', '5', 'Interp', '1', 'i', 'A'], i51iA.label) self.assertEqual(0, len(i51iA.children)) self.assertEqual(['737', '5', 'a', 'Interp'], i5a.label) self.assertEqual(1, len(i5a.children)) i5a1 = i5a.children[0] self.assertEqual(['737', '5', 'a', 'Interp', '1'], i5a1.label) self.assertEqual(2, len(i5a1.children)) i5a1i, i5a1ii = i5a1.children self.assertEqual(['737', '5', 'a', 'Interp', '1', 'i'], i5a1i.label) self.assertEqual(0, len(i5a1i.children)) self.assertEqual(['737', '5', 'a', 'Interp', '1', 'ii'], i5a1ii.label) self.assertEqual(1, len(i5a1ii.children)) i5a1iiA = i5a1ii.children[0] self.assertEqual(['737', '5', 'a', 'Interp', '1', 'ii', 'A'], i5a1iiA.label) self.assertEqual(3, len(i5a1iiA.children)) i5a1iiA1, i5a1iiA2, i5a1iiA3 = i5a1iiA.children self.assertEqual(['737', '5', 'a', 'Interp', '1', 'ii', 'A', '1'], i5a1iiA1.label) self.assertEqual(i5a1iiA1.tagged_text, '<E T="03">1.</E> More info') self.assertEqual(0, len(i5a1iiA1.children)) self.assertEqual(['737', '5', 'a', 'Interp', '1', 'ii', 'A', '2'], i5a1iiA2.label) self.assertEqual(i5a1iiA2.tagged_text, '<E T="03">2.</E> Second info') self.assertEqual(0, len(i5a1iiA2.children)) self.assertEqual(['737', '5', 'a', 'Interp', '1', 'ii', 'A', '3'], i5a1iiA3.label) self.assertEqual(i5a1iiA3.tagged_text, '<E T="03">3. Keyterms</E>') self.assertEqual(0, len(i5a1iiA3.children))