def _append_text(element: etree.ElementBase, text: Optional[str]) -> None: if not text: return last_child = _get_last_child_or_none(element) if last_child is not None and last_child.tail: last_child.tail = last_child.tail + '' + text elif last_child is not None: last_child.tail = text elif element.text: element.text = element.text + '' + text else: element.text = text
def extend_element( element: etree.ElementBase, children_or_attributes: Iterable[etree.ElementBase] ): for item in children_or_attributes: if isinstance(item, dict): element.attrib.update(item) continue if isinstance(item, str): try: previous_element = element[-1] except IndexError: previous_element = None if previous_element is not None: previous_element.tail = ( (previous_element.tail or '') + item ) else: element.text = ( (element.text or '') + item ) continue element.append(item)
def append_text(element: etree.ElementBase, text: str): try: if element[-1].tail: element[-1].tail += text else: element[-1].tail = text except IndexError: if element.text: element.text += text else: element.text = text
def _transform( self, token_generator: str, operation: Operation, element: etree.ElementBase, ): value = ''.join(element.itertext()) if not value: return element.clear() element.text = transform(value, operation, token_generator)
def replace_notelabel(el: ElementBase, text: str) -> None: '修改脚注的标签' while el.tag != 'a': el = el.getparent() if el.tag != 'a': a = el.find('.//a') if a is not None: el = a while len(el): el = el[0] el.text = text
def transform_context(context: etree.ElementBase) -> etree.ElementBase: sent: List[str] = [] before = context.text head_tag = context[0] head = head_tag.text after = head_tag.tail before_tok = space_tokenize(before) head_tok = space_tokenize(head) after_tok = space_tokenize(after) sent = before_tok + head_tok + after_tok new_sent = transform_tokens(sent) new_before = new_sent[:len(before_tok)] new_head = new_sent[len(before_tok):len(before_tok) + len(head_tok)] new_after = new_sent[len(before_tok) + len(head_tok):] context.text = "\n" + "".join(tok + " " for tok in new_before) head_tag.text = " ".join(tok for tok in new_head) head_tag.tail = "".join(" " + tok for tok in new_after) + "\n" return context
def _transform(self, operation: Callable, element: etree.ElementBase): value = ''.join(element.itertext()) if not value: return element.clear() element.text = operation(value)
def try_fix_namespace_path(node: etree.ElementBase): if is_namespace_path(node): node.text = node.text.replace(':', os.sep)