Python build_supplement_tree示例，regparser.tree.xml_parser.interpretations.build_supplement_tree Python示例

示例#1

0

显示文件

文件： tree_xml_parser_interpretations_tests.py 项目： govtmirror/regulations-parser-1

    def test_build_supplement_tree_appendix_paragraphs(self):
        xml = """<APPENDIX>
            <HD SOURCE="HED">
                Supplement I to Part 737-Official Interpretations</HD>
            <HD SOURCE="HD2">Appendix H</HD>
            <HD SOURCE="HD3">(b) bbbbbbb</HD>
            <P>1. Paragraph b</P>
            <HD SOURCE="HD3">(b)(5) b5b5b5</HD>
            <P>1. Paragraph b5</P>
        </APPENDIX>"""
        tree = interpretations.build_supplement_tree('737',
                                                     etree.fromstring(xml))
        self.assertEqual(['737', 'Interp'], tree.label)
        self.assertEqual(1, len(tree.children))

        iH = tree.children[0]
        self.assertEqual(['737', 'H', 'Interp'], iH.label)
        self.assertEqual(1, len(iH.children))

        iHb = iH.children[0]
        self.assertEqual(['737', 'H', 'b', 'Interp'], iHb.label)
        self.assertEqual(2, len(iHb.children))

        iHb1, iHb5 = iHb.children
        self.assertEqual(['737', 'H', 'b', 'Interp', '1'], iHb1.label)
        self.assertEqual(['737', 'H', 'b', '5', 'Interp'], iHb5.label)

示例#2

0

显示文件

文件： tree_xml_parser_interpretations_tests.py 项目： govtmirror/regulations-parser-1

    def test_build_supplement_intro_section(self):
        """Integration test"""
        xml = """<APPENDIX>
            <HD SOURCE="HED">
                Supplement I to Part 737-Official Interpretations</HD>
            <HD SOURCE="HD1">Introduction</HD>
            <P>1. Some content. (a) Badly named</P>
            <P>(b) Badly named</P>
            <HD SOURCE="HD1">Subpart A</HD>
            <HD SOURCE="HD2">Section 737.13</HD>
            <P><E>13(a) Some Stuff!</E></P>
            <P>1. 131313</P>
            <HD SOURCE="HD2">Appendix G</HD>
            <P>1. G</P>
        </APPENDIX>"""
        tree = interpretations.build_supplement_tree('737',
                                                     etree.fromstring(xml))
        self.assertEqual(['737', 'Interp'], tree.label)
        self.assertEqual(3, len(tree.children))
        h1, s13, g = tree.children

        self.assertEqual(['737', 'Interp', 'h1'], h1.label)
        self.assertEqual(['737', '13', 'Interp'], s13.label)
        self.assertEqual(['737', 'G', 'Interp'], g.label)

        self.assertEqual(len(h1.children), 1)
        self.assertEqual('1. Some content. (a) Badly named\n\n' +
                         '(b) Badly named', h1.children[0].text.strip())
        self.assertEqual(len(h1.children[0].children), 0)

        self.assertEqual(1, len(s13.children))
        self.assertEqual('13(a) Some Stuff!', s13.children[0].title)

示例#3

0

显示文件

文件： tree_xml_parser_interpretations_tests.py 项目： govtmirror/regulations-parser-1

    def test_build_supplement_tree_spacing(self):
        """Integration test"""
        xml = """<APPENDIX>
            <HD SOURCE="HED">
                Supplement I to Part 737-Official Interpretations</HD>
            <HD SOURCE="HD2">Section 737.5 NASCAR</HD>
            <P>1.<E T="03">Phrase</E>. More Content</P>
            <P>i. I like<PRTPAGE />ice cream</P>
            <P>A. Aaaaah</P>
            <P><E T="03">1.</E>More info</P>
        </APPENDIX>"""
        tree = interpretations.build_supplement_tree('737',
                                                     etree.fromstring(xml))
        self.assertEqual(['737', 'Interp'], tree.label)
        self.assertEqual(1, len(tree.children))

        s5 = tree.children[0]
        self.assertEqual(1, len(s5.children))

        s51 = s5.children[0]
        self.assertEqual(s51.text.strip(), "1. Phrase. More Content")
        self.assertEqual(1, len(s51.children))

        s51i = s51.children[0]
        self.assertEqual(s51i.text.strip(), "i. I like ice cream")
        self.assertEqual(1, len(s51i.children))

        s51iA = s51i.children[0]
        self.assertEqual(s51iA.text.strip(), "A. Aaaaah")
        self.assertEqual(1, len(s51iA.children))

        s51iA1 = s51iA.children[0]
        self.assertEqual(s51iA1.text.strip(), "1. More info")
        self.assertEqual(0, len(s51iA1.children))

示例#4

0

显示文件

文件： tree_xml_parser_interpretations_tests.py 项目： govtmirror/regulations-parser-1

    def test_build_supplement_tree_skip_levels(self):
        xml = """<APPENDIX>
            <HD SOURCE="HED">
                Supplement I to Part 737-Official Interpretations</HD>
            <HD SOURCE="HD2">Section 737.5 NASCAR</HD>
            <HD SOURCE="HD2">5(a)(1)(i) Access Device</HD>
            <P>1. Paragraph 111</P>
            <HD SOURCE="HD2">5(b) Other Devices</HD>
            <P>1. Paragraph 222</P>
        </APPENDIX>"""
        tree = interpretations.build_supplement_tree('737',
                                                     etree.fromstring(xml))
        self.assertEqual(['737', 'Interp'], tree.label)
        self.assertEqual(1, len(tree.children))

        i5 = tree.children[0]
        self.assertEqual(['737', '5', 'Interp'], i5.label)
        self.assertEqual(2, len(i5.children))
        i5a, i5b = i5.children

        self.assertEqual(['737', '5', 'a', 'Interp'], i5a.label)
        self.assertEqual(1, len(i5a.children))
        i5a1 = i5a.children[0]

        self.assertEqual(['737', '5', 'a', '1', 'Interp'], i5a1.label)
        self.assertEqual(1, len(i5a1.children))
        i5a1i = i5a1.children[0]

        self.assertEqual(['737', '5', 'a', '1', 'i', 'Interp'], i5a1i.label)
        self.assertEqual(1, len(i5a1i.children))

        self.assertEqual(['737', '5', 'b', 'Interp'], i5b.label)
        self.assertEqual(1, len(i5b.children))

示例#5

0

显示文件

文件： appendices.py 项目： khandelwal/regulations-parser

def build_non_reg_text(reg_xml, reg_part):
    """ This builds the tree for the non-regulation text such as Appendices
    and the Supplement section """
    if isinstance(reg_xml, str) or isinstance(reg_xml, unicode):
        doc_root = etree.fromstring(reg_xml)
    else:
        doc_root = reg_xml
    non_reg_sects = doc_root.xpath('//PART//APPENDIX')
    children = []

    for non_reg_sect in non_reg_sects:
        section_title = get_app_title(non_reg_sect)
        if 'Supplement' in section_title and 'Part' in section_title:
            children.append(build_supplement_tree(reg_part, non_reg_sect))
        else:
            children.append(process_appendix(non_reg_sect, reg_part))

    return children

示例#6

0

显示文件

文件： appendices.py 项目： pombreda/regulations-parser

def build_non_reg_text(reg_xml, reg_part):
    """ This builds the tree for the non-regulation text such as Appendices
    and the Supplement section """
    if isinstance(reg_xml, str) or isinstance(reg_xml, unicode):
        doc_root = etree.fromstring(reg_xml)
    else:
        doc_root = reg_xml
    non_reg_sects = doc_root.xpath('//PART//APPENDIX')
    children = []

    for non_reg_sect in non_reg_sects:
        section_title = get_app_title(non_reg_sect)
        if 'Supplement' in section_title and 'Part' in section_title:
            children.append(build_supplement_tree(reg_part, non_reg_sect))
        else:
            children.append(process_appendix(non_reg_sect, reg_part))

    return children

示例#7

0

显示文件

文件： appendices.py 项目： govtmirror/regulations-parser-1

def build_non_reg_text(reg_xml, reg_part):
    """ This builds the tree for the non-regulation text such as Appendices
    and the Supplement section """
    if isinstance(reg_xml, six.string_types):
        doc_root = etree.fromstring(reg_xml)
    else:
        doc_root = reg_xml
    non_reg_sects = doc_root.xpath('//PART//APPENDIX')
    logger.debug("Non Reg sections: %r", non_reg_sects)
    children = []

    for non_reg_sect in non_reg_sects:
        section_title = get_app_title(non_reg_sect)
        logger.debug("Building non reg sect: %s", section_title)
        if 'Supplement' in section_title and 'Part' in section_title:
            children.append(build_supplement_tree(reg_part, non_reg_sect))
        else:
            children.append(process_appendix(non_reg_sect, reg_part))

    return children

示例#8

0

显示文件

文件： appendices.py 项目： anthonygarvan/regulations-parser

def build_non_reg_text(reg_xml, reg_part):
    """ This builds the tree for the non-regulation text such as Appendices
    and the Supplement section """
    if isinstance(reg_xml, six.string_types):
        doc_root = etree.fromstring(reg_xml)
    else:
        doc_root = reg_xml
    non_reg_sects = doc_root.xpath('//PART//APPENDIX')
    logger.debug("Non Reg sections: %r", non_reg_sects)
    children = []

    for non_reg_sect in non_reg_sects:
        section_title = get_app_title(non_reg_sect)
        logger.debug("Building non reg sect: %s", section_title)
        if 'Supplement' in section_title and 'Part' in section_title:
            children.append(build_supplement_tree(reg_part, non_reg_sect))
        else:
            children.append(process_appendix(non_reg_sect, reg_part))

    return children

示例#9

0

显示文件

文件： tree_xml_parser_interpretations_tests.py 项目： govtmirror/regulations-parser-1

    def test_build_supplement_tree_repeats(self):
        """Integration test"""
        xml = """<APPENDIX>
            <HD SOURCE="HED">
                Supplement I to Part 737-Official Interpretations</HD>
            <HD SOURCE="HD2">Appendices G and H-Content</HD>
            <P>1. G:H</P>
            <HD SOURCE="HD2">Appendix G</HD>
            <P>1. G</P>
            <HD SOURCE="HD2">Appendix H</HD>
            <P>1. H</P>
        </APPENDIX>"""
        tree = interpretations.build_supplement_tree('737',
                                                     etree.fromstring(xml))
        self.assertEqual(['737', 'Interp'], tree.label)
        self.assertEqual(3, len(tree.children))
        aGH, aG, aH = tree.children

        self.assertEqual(['737', 'G_H', 'Interp'], aGH.label)
        self.assertEqual(['737', 'G', 'Interp'], aG.label)
        self.assertEqual(['737', 'H', 'Interp'], aH.label)

示例#10

0

显示文件

文件： tree_xml_parser_interpretations_tests.py 项目： govtmirror/regulations-parser-1

    def test_build_supplement_tree(self):
        """Integration test"""
        xml = """<APPENDIX>
            <HD SOURCE="HED">
                Supplement I to Part 737-Official Interpretations</HD>
            <HD SOURCE="HD2">Section 737.5 NASCAR</HD>
            <P>1. Paragraph 1</P>
            <P>i. Paragraph i; A. Start of A</P>
            <HD SOURCE="HD2">5(a) Access Device</HD>
            <P>1. Paragraph 111</P>
            <P>i. Content content</P>
            <P>ii. More content</P>
            <P>A. Aaaaah</P>
            <P><E T="03">1.</E> More info</P>
            <P><E T="03">2.</E> Second info</P>
            <P><E T="03">3. Keyterms</E></P>
        </APPENDIX>"""
        tree = interpretations.build_supplement_tree('737',
                                                     etree.fromstring(xml))
        self.assertEqual(['737', 'Interp'], tree.label)
        self.assertEqual(1, len(tree.children))

        i5 = tree.children[0]
        self.assertEqual(['737', '5', 'Interp'], i5.label)
        self.assertEqual(2, len(i5.children))

        i51, i5a = i5.children
        self.assertEqual(['737', '5', 'Interp', '1'], i51.label)
        self.assertEqual(1, len(i51.children))
        i51i = i51.children[0]
        self.assertEqual(['737', '5', 'Interp', '1', 'i'], i51i.label)
        self.assertEqual(1, len(i51i.children))
        i51iA = i51i.children[0]
        self.assertEqual(['737', '5', 'Interp', '1', 'i', 'A'], i51iA.label)
        self.assertEqual(0, len(i51iA.children))

        self.assertEqual(['737', '5', 'a', 'Interp'], i5a.label)
        self.assertEqual(1, len(i5a.children))
        i5a1 = i5a.children[0]
        self.assertEqual(['737', '5', 'a', 'Interp', '1'], i5a1.label)
        self.assertEqual(2, len(i5a1.children))
        i5a1i, i5a1ii = i5a1.children
        self.assertEqual(['737', '5', 'a', 'Interp', '1', 'i'], i5a1i.label)
        self.assertEqual(0, len(i5a1i.children))

        self.assertEqual(['737', '5', 'a', 'Interp', '1', 'ii'], i5a1ii.label)
        self.assertEqual(1, len(i5a1ii.children))
        i5a1iiA = i5a1ii.children[0]
        self.assertEqual(['737', '5', 'a', 'Interp', '1', 'ii', 'A'],
                         i5a1iiA.label)
        self.assertEqual(3, len(i5a1iiA.children))
        i5a1iiA1, i5a1iiA2, i5a1iiA3 = i5a1iiA.children
        self.assertEqual(['737', '5', 'a', 'Interp', '1', 'ii', 'A', '1'],
                         i5a1iiA1.label)
        self.assertEqual(i5a1iiA1.tagged_text, '<E T="03">1.</E> More info')
        self.assertEqual(0, len(i5a1iiA1.children))
        self.assertEqual(['737', '5', 'a', 'Interp', '1', 'ii', 'A', '2'],
                         i5a1iiA2.label)
        self.assertEqual(i5a1iiA2.tagged_text, '<E T="03">2.</E> Second info')
        self.assertEqual(0, len(i5a1iiA2.children))
        self.assertEqual(['737', '5', 'a', 'Interp', '1', 'ii', 'A', '3'],
                         i5a1iiA3.label)
        self.assertEqual(i5a1iiA3.tagged_text, '<E T="03">3. Keyterms</E>')
        self.assertEqual(0, len(i5a1iiA3.children))