示例#1
0
def _clear_context(elem: etree._Element) -> None:
    elem.clear()

    while elem.getprevious() is not None:
        del elem.getparent()[0]

    return None
示例#2
0
def cleanup(elem: etree._Element):
    elem.clear()
    while elem.getprevious() is not None:
        del elem.getparent()[0]  # clean up preceding siblings
示例#3
0
    def _add_kobo_spans_to_node(
        self, node: etree._Element, name: str
    ) -> etree._Element:
        # process node only if it is not a comment or a processing instruction
        if (
            node is None
            or isinstance(node, etree._Comment)
            or isinstance(node, etree._ProcessingInstruction)
        ):
            if node is not None:
                node.tail = None
            self.log.debug(f"[{name}] Skipping comment/ProcessingInstruction node")
            return node

        # Special case some tags
        special_tag_match = re.search(r"^(?:\{[^\}]+\})?(\w+)$", node.tag)
        if special_tag_match:
            # Skipped tags are just flat out skipped
            if special_tag_match.group(1) in SKIPPED_TAGS:
                self.log.debug(f"[{name}] Skipping '{special_tag_match.group(1)}' tag")
                return node

            # Special tags get wrapped in a span and their children are ignored
            if special_tag_match.group(1) in SPECIAL_TAGS:
                self.log.debug(
                    f"[{name}] Wrapping '{special_tag_match.group(1)}' tag and "
                    + "ignoring children"
                )
                span = etree.Element(
                    f"{{{XHTML_NAMESPACE}}}span",
                    attrib={
                        "id": f"kobo.{self.paragraph_counter[name]}.1",
                        "class": "koboSpan",
                    },
                )
                span.append(node)
                return span

        # save node content for later
        node_text = node.text
        node_children = deepcopy(node.getchildren())
        node_attrs = {}
        for key in list(node.keys()):
            node_attrs[key] = node.get(key)

        # reset current node, to start from scratch
        node.clear()

        # restore node attributes
        for key in node_attrs:
            node.set(key, node_attrs[key])

        # the node text is converted to spans
        if node_text is not None:
            if not self._append_kobo_spans_from_text(node, node_text, name):
                # didn't add spans, restore text
                node.text = node_text
            else:
                self.paragraph_counter[name] += 1

        # re-add the node children
        for child in node_children:
            # save child tail for later
            child_tail = child.tail
            child.tail = None
            node.append(self._add_kobo_spans_to_node(child, name))
            # the child tail is converted to spans
            if child_tail is not None:
                if not self._append_kobo_spans_from_text(node, child_tail, name):
                    # didn't add spans, restore tail on last child
                    node[-1].tail = child_tail
                else:
                    self.paragraph_counter[name] += 1

        return node