Exemplo n.º 1
0
    def read_section(self, node: Any) -> None:
        attrs = XmlPreprocessor.get_clear_attributes(node)
        num_str = attrs.get('abstractNumId')
        if not num_str:
            return
        sect_index = int(num_str)
        sect = NumberingSetsSection()
        restart_num_str = attrs.get('restartNumberingAfterBreak') or ''
        sect.restart_after_break = restart_num_str != '0'

        self.collections[sect_index] = sect
        go_deeper = True

        for child in node:
            tag = XmlPreprocessor.get_clear_tag(child)
            # find "<w:lvl w:ilvl="0">"
            if tag == 'lvl':
                self.read_numbering_sets(child, sect)
                go_deeper = False

        try:
            if not go_deeper:
                return
            for child in node:
                self.read_section(child)
        finally:
            sect.initialize()
Exemplo n.º 2
0
 def explore_paragraph_numpr(self, pr: DocParagraph, node: Any):
     for elt in node:
         tag = XmlPreprocessor.get_clear_tag(elt)
         if tag == 'ilvl':
             style_val = XmlPreprocessor.get_clear_attributes(elt).get('val')
             if style_val:
                 pr.list_level = int(style_val)
         elif tag == 'numId':
             style_val = XmlPreprocessor.get_clear_attributes(elt).get('val')
             if style_val:
                 pr.list_number = int(style_val)
Exemplo n.º 3
0
 def explore_paragraph_properties(self, pr: DocParagraph, node: Any):
     for elt in node:
         tag = XmlPreprocessor.get_clear_tag(elt)
         if tag == 'pStyle':
             style_val = XmlPreprocessor.get_clear_attributes(elt).get('val')
             if style_val:
                 self.apply_paragraph_style(pr, style_val)
         elif tag == 'numPr':
             self.explore_paragraph_numpr(pr, elt)
         else:
             self.explore_paragraph_properties(pr, elt)
 def add_hyperlink(self, node: Any, container: List[DocItem]) -> None:
     text_child = XmlPreprocessor.find_children_by_tag(node,
                                                       't',
                                                       first_only=True)
     if not text_child or not text_child[0].text:
         return
     link = self.make_hyperlink_instance()
     link.text = text_child[0].text
     # get link's reference
     # r:id="rId4" -> self.relationship
     rel_id = XmlPreprocessor.get_clear_attribute_val(node, 'id')
     if rel_id:
         rel_link = self.relationship.rel_by_id.get(rel_id) or ''
         link.link = rel_link
     container.append(link)
Exemplo n.º 5
0
 def explore_section(self, node: Any, style_set: StyleSets):
     for child in node:
         tag = XmlPreprocessor.get_clear_tag(child)
         if tag == 'aliases':
             val = XmlPreprocessor.get_clear_attribute_val(child, 'val')
             aliases = set((val or '').split(','))
             style_set.aliases = aliases
             continue
         if tag == 'ilvl':
             val = XmlPreprocessor.get_clear_attribute_val(child, 'val')
             style_set.ilvl = int(val)
             continue
         if tag == 'numId':
             val = XmlPreprocessor.get_clear_attribute_val(child, 'val')
             style_set.numId = int(val)
             continue
         self.explore_section(child, style_set)
 def traverse_doc_tree(self, node: Any, items_container: List[DocItem]):
     # fill "Items"
     for elt in node:
         tag = XmlPreprocessor.get_clear_tag(elt)
         if tag == 'p':
             self.add_paragraph(elt, items_container)
         elif tag == 'tbl':
             self.add_table(elt, items_container)
         else:
             self.traverse_doc_tree(elt, items_container)
Exemplo n.º 7
0
 def read_num_map(self, node: Any) -> None:
     """
     <w:num w:numId="1">
         <w:abstractNumId w:val="1" />
     </w:num>
     """
     num_id_str = XmlPreprocessor.get_clear_attribute_val(node, 'numId')
     if not num_id_str:
         return
     num_id = int(num_id_str)  # list number
     for child in node:
         tag = XmlPreprocessor.get_clear_tag(child)
         if tag != 'abstractNumId':
             continue
         val_str = XmlPreprocessor.get_clear_attribute_val(child, 'val')
         if not val_str:
             break
         sect_id = int(val_str)
         self.list_num_to_section_id[num_id] = sect_id
         return
    def traverse_tree(self, node) -> None:
        go_deeper = True
        for child in node:
            tag = XmlPreprocessor.get_clear_tag(child)
            if tag == 'Relationship':
                self.parse_relationship(child)
                go_deeper = False

        if go_deeper:
            for child in node:
                self.traverse_tree(child)
Exemplo n.º 9
0
    def read_numbering_sets(self, node: Any, sect: NumberingSetsSection):
        nm_set = NumberingSets()
        lvl = XmlPreprocessor.get_clear_attribute_val(node, 'ilvl')
        nm_set.level = int(lvl)

        for child in node:
            tag = XmlPreprocessor.get_clear_tag(child)
            if tag == 'start':
                nm_set.start = int(
                    XmlPreprocessor.get_clear_attribute_val(child, 'val')
                    or '0')
                continue
            if tag == 'numFmt':
                nm_set.num_fmt = \
                    XmlPreprocessor.get_clear_attribute_val(child, 'val') or 'bullet'
                continue
            if tag == 'lvlText':
                nm_set.lvl_text = \
                    XmlPreprocessor.get_clear_attribute_val(child, 'val') or '*'
                continue
            if tag == 'lvlJc':
                nm_set.lvl_jc = \
                    XmlPreprocessor.get_clear_attribute_val(child, 'val') or 'left'
                continue
        sect.sets.append(nm_set)
    def fill_table_rows(self, tbl: DocTable, node: Any):
        go_deeper = True
        for elt in node:
            tag = XmlPreprocessor.get_clear_tag(elt)
            if tag == 'tr':
                go_deeper = False
                self.fill_table_row(tbl, elt)

        if not go_deeper:
            return

        for elt in node:
            self.fill_table_rows(tbl, elt)
Exemplo n.º 11
0
    def traverse_tree(self, node: Any) -> None:
        # looking for "<w:style w:type="paragraph" w:styleId="Heading2">"
        get_section = False
        for child in node:
            tag = XmlPreprocessor.get_clear_tag(child)
            if tag == 'style':
                self.read_section(child)
                get_section = True

        if get_section:
            return
        for child in node:
            self.traverse_tree(child)
    def fill_table_row(self, tbl: DocTable, node: Any):
        row = DocTableRow()
        tbl.rows.append(row)

        go_deeper = True
        for elt in node:
            tag = XmlPreprocessor.get_clear_tag(elt)
            if tag == 'tc':
                go_deeper = False
                self.fill_table_cell(row, elt)

        if not go_deeper:
            return

        for elt in node:
            self.fill_table_row(tbl, elt)
 def explore_paragraph(self, pr: DocParagraph, node: Any):
     for child in node:
         tag = XmlPreprocessor.get_clear_tag(child)
         if tag == 'pPr':
             self.explore_paragraph_properties(pr, child)
         elif tag == 'hyperlink':
             self.add_hyperlink(child, pr.text_items)
         elif tag == 'tab':
             pr.text_items.append(DocText('\t'))
         elif tag == 't':
             if child.text:
                 pr.text_items.append(DocText(child.text))
         elif tag == 'r':
             self.explore_paragraph(pr, child)
         else:
             self.explore_paragraph(pr, child)
Exemplo n.º 14
0
    def traverse_tree(self, node: Any) -> None:
        # looking for "<w:abstractNum w:abstractNumId="1" restartNumberingAfterBreak="0">"
        get_section = False
        get_num_map = False
        for child in node:
            tag = XmlPreprocessor.get_clear_tag(child)
            if tag == 'abstractNum':
                self.read_section(child)
                get_section = True
            elif tag == 'num':
                get_num_map = True
                self.read_num_map(child)

        if get_section and get_num_map:
            return
        for child in node:
            self.traverse_tree(child)
Exemplo n.º 15
0
 def read_section(self, node: Any) -> None:
     """
     <w:style w:type="paragraph" w:styleId="Heading2">
         <w:aliases w:val="1.1,2nd,B Sub/Bold,B Sub/Bold1,B Sub/Bold11,B Sub/Bold12,B Sub/Bold13" />
         <w:pPr>
             <w:keepNext />
             <w:numPr>
                 <w:ilvl w:val="1" />
                 <w:numId w:val="19" />
             </w:numPr>
         </w:pPr>
     </w:num>
     """
     style_id = XmlPreprocessor.get_clear_attribute_val(node, 'styleId')
     if not style_id:
         return
     style_set = StyleSets()
     style_set.styleId = style_id
     self.sets[style_id] = style_set
     try:
         self.explore_section(node, style_set)
     except:
         del self.sets[style_id]
Exemplo n.º 16
0
 def test_remove_namespace(self):
     text = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main"}val'
     self.assertEqual('val', XmlPreprocessor.remove_namespace(text))
     self.assertEqual('val', XmlPreprocessor.remove_namespace('val'))