def test_process_inner_child(self):
        xml = """
        <ROOT>
            <HD>Title</HD>
            <P>1. 111. i. iii</P>
            <STARS />
            <P>A. AAA</P>
            <P><E T="03">1.</E> eee</P>
        </ROOT>"""
        node = etree.fromstring(xml).xpath('//HD')[0]
        stack = tree_utils.NodeStack()
        interpretations.process_inner_children(stack, node)
        while stack.size() > 1:
            stack.unwind()
        n1 = stack.m_stack[0][0][1]
        self.assertEqual(['1'], n1.label)
        self.assertEqual(1, len(n1.children))

        n1i = n1.children[0]
        self.assertEqual(['1', 'i'], n1i.label)
        self.assertEqual(n1i.text.strip(), 'i. iii')
        self.assertEqual(1, len(n1i.children))

        n1iA = n1i.children[0]
        self.assertEqual(['1', 'i', 'A'], n1iA.label)
        self.assertEqual(1, len(n1iA.children))

        n1iA1 = n1iA.children[0]
        self.assertEqual(['1', 'i', 'A', '1'], n1iA1.label)
        self.assertEqual(0, len(n1iA1.children))
 def test_process_inner_child_has_citation(self):
     xml = """
     <ROOT>
         <HD>Title</HD>
         <P>1. Something something see comment 22(a)-2.i. please</P>
     </ROOT>"""
     node = etree.fromstring(xml).xpath('//HD')[0]
     stack = tree_utils.NodeStack()
     interpretations.process_inner_children(stack, node)
     while stack.size() > 1:
         stack.unwind()
     tree = stack.m_stack[0][0][1]
     self.assertEqual(0, len(tree.children))
 def test_process_inner_child_incorrect_xml(self):
     xml = """
     <ROOT>
         <HD>Title</HD>
         <P><E T="03">1.</E> 111</P>
         <P>i. iii</P>
         <P><E T="03">2.</E> 222 Incorrect Content</P>
     </ROOT>"""
     node = etree.fromstring(xml).xpath('//HD')[0]
     stack = tree_utils.NodeStack()
     interpretations.process_inner_children(stack, node)
     while stack.size() > 1:
         stack.unwind()
     self.assertEqual(2, len(stack.m_stack[0]))
 def test_process_inner_child_no_marker(self):
     xml = """
         <ROOT>
             <HD>Title</HD>
             <P>1. 111</P>
             <P>i. iii</P>
             <P>Howdy Howdy</P>
         </ROOT>"""
     node = etree.fromstring(xml).xpath('//HD')[0]
     stack = tree_utils.NodeStack()
     interpretations.process_inner_children(stack, node)
     while stack.size() > 1:
         stack.unwind()
     i1 = stack.m_stack[0][0][1]
     self.assertEqual(1, len(i1.children))
     i1i = i1.children[0]
     self.assertEqual(0, len(i1i.children))
     self.assertEqual(i1i.text.strip(), "i. iii\n\nHowdy Howdy")
 def test_process_inner_child_collapsed_i(self):
     xml = """
     <ROOT>
         <HD>Title</HD>
         <P>1. <E T="03">Keyterm text</E> i. Content content</P>
         <P>ii. Other stuff</P>
     </ROOT>"""
     node = etree.fromstring(xml).xpath('//HD')[0]
     stack = tree_utils.NodeStack()
     interpretations.process_inner_children(stack, node)
     while stack.size() > 1:
         stack.unwind()
     tree = stack.m_stack[0][0][1]
     self.assertEqual(['1'], tree.label)
     self.assertEqual(2, len(tree.children))
     self.assertEqual(['1', 'i'], tree.children[0].label)
     self.assertEqual(0, len(tree.children[0].children))
     self.assertEqual(['1', 'ii'], tree.children[1].label)
     self.assertEqual(0, len(tree.children[1].children))
    def test_process_inner_child_space(self):
        xml = """
        <ROOT>
            <HD>Title</HD>
            <P>1. 111</P>
            <P>i. See country A. Not that country</P>
        </ROOT>"""
        node = etree.fromstring(xml).xpath('//HD')[0]
        stack = tree_utils.NodeStack()
        interpretations.process_inner_children(stack, node)
        while stack.size() > 1:
            stack.unwind()
        n1 = stack.m_stack[0][0][1]
        self.assertEqual(['1'], n1.label)
        self.assertEqual(1, len(n1.children))

        n1i = n1.children[0]
        self.assertEqual(['1', 'i'], n1i.label)
        self.assertEqual(0, len(n1i.children))
 def test_process_inner_child_stars_and_inline(self):
     xml = """
     <ROOT>
         <HD>Title</HD>
         <STARS />
         <P>2. Content. * * *</P>
         <STARS />
         <P>xi. Content</P>
         <STARS />
     </ROOT>"""
     node = etree.fromstring(xml).xpath('//HD')[0]
     stack = tree_utils.NodeStack()
     interpretations.process_inner_children(stack, node)
     while stack.size() > 1:
         stack.unwind()
     tree = stack.m_stack[0][0][1]
     self.assertEqual(['2'], tree.label)
     self.assertEqual(1, len(tree.children))
     self.assertEqual(['2', 'xi'], tree.children[0].label)
     self.assertEqual(0, len(tree.children[0].children))
示例#8
0
def process_without_headers(cfr_part, parent_xml, amended_labels):
    """Sometimes, we only get a list of paragraphs that have changes, but no
    header indicating with which sections they are associated. Accommodate
    by trying to match up amended_labels with paragraphs"""
    parent_xml = standardize_xml(parent_xml)

    relevant_labels = [al.label for al in
                       filter(_is_interp_amend, amended_labels)]
    label_indices = []
    for idx, child in enumerate(parent_xml):
        text = tree_utils.get_node_text(child)
        if len(relevant_labels) > len(label_indices):
            marker = relevant_labels[len(label_indices)][-1] + '.'
            if text.startswith(marker):
                label_indices.append(idx)

    labelXindex = zip(relevant_labels, label_indices)
    nodes = []
    #   Reverse it so we can delete from the bottom
    for label, idx in reversed(labelXindex):
        stack = tree_utils.NodeStack()
        prefix = label[:label.index(Node.INTERP_MARK) + 1]
        section = Node(node_type=Node.INTERP, label=prefix)
        stack.add(2, section)
        interpretations.process_inner_children(stack, parent_xml[idx - 1])
        while stack.size() > 1:
            stack.unwind()

        nodes.append(stack.m_stack[0][0][1])

        # delete the tail
        while len(parent_xml.getchildren()) > idx:
            parent_xml.remove(parent_xml[idx])
    if nodes:
        nodes.append(Node(node_type=Node.INTERP,
                          label=[cfr_part, Node.INTERP_MARK]))
        #   Reverse it again into normal flow
        return treeify(list(reversed(nodes)))[0]
    else:
        return None