def set_run_text(run: Element, text: str): dom = run.ownerDocument if run_contains_text(run): text_element = run.getElementsByTagNameNS(_namespaces['w'], 't')[0] set_text(text_element, text) else: text_element = dom.createElementNS(_namespaces['w'], 'w:t') set_text(text_element, text) run.appendChild(text_element)
def get_text_from_paragraph_runs(node: Element, list_of_runs_indexes: list = None) -> str: """Get all the text from the paragraph :param node: Paragraph to extract text from :param list_of_runs_indexes: List of runs indexes to extract text from :return: String containing all text inside paragraph Iterates through all runs inside paragraph appends text and returns it. """ text = '' runs = node.getElementsByTagNameNS(_namespaces['w'], 'r') if list_of_runs_indexes is not None: runs = list(runs[i] for i in list_of_runs_indexes) for run in runs: if run_contains_text(run): text_element = run.getElementsByTagNameNS(_namespaces['w'], 't') # Each run will contains only one text element. So select first # (0th index) element text += text_element[0].firstChild.nodeValue return text
def clear_run_text(run: Element): if run_contains_text(run): text_element = run.getElementsByTagNameNS(_namespaces['w'], 't')[0] set_text(text_element, '')
def get_run_text(run: Element): if run_contains_text(run): text_element = run.getElementsByTagNameNS(_namespaces['w'], 't')[0] return get_text(text_element) else: return None
def _merge_placeholder_broken_inside_runs_if_required(paragraph: Element): """Merge broken runs containing mustache placeholders. :param paragraph: paragraph xml element containing broken runs docx document is made up of paragraph among other things. A run is a part of a paragraph with different formatting(color, bold...). But most times Microsoft word and libreoffice Writer splits up text with same formatting into different runs. If this text contains mustache placeholders, it will be missed by mustache renderer. This method merges runs into one if it contains mustache placeholders. """ runs = paragraph.getElementsByTagNameNS(_namespaces['w'], 'r') def _merge(run: Element, text_to_replace='', open_brace_count=0, close_brace_count=0, runs_to_merge=None): """Merge placeholders broken into runs Microsoft Word and libreoffice most times split placeholders into multiple runs. For example <w:r> <w:rPr> <w:b w:val="false"/> <w:bCs w:val="false"/> </w:rPr> <w:t>{{PRODUCTS</w:t> </w:r> <w:r> <w:rPr/> <w:t>}}</w:t> </w:r> We need to merge this into one run while retaining the style """ if runs_to_merge is None: runs_to_merge = [] if run is None: return elif not run_contains_text(run): pass else: text = get_run_text(run) open_brace_count += text.count('{{') close_brace_count += text.count('}}') text_to_replace += text # Once we have matching nodes, set text_to_replace as value to the # last run and remove previous runs if not open_brace_count == close_brace_count: runs_to_merge.append(run) elif runs_to_merge: set_run_text(run, text_to_replace) for r in runs_to_merge: paragraph.removeChild(r) runs_to_merge = [] text_to_replace = '' return _merge(run.nextSibling, text_to_replace, open_brace_count, close_brace_count, runs_to_merge) _merge(runs[0])
def _merge_placeholder_broken_inside_runs_if_required(paragraph: Element): """Merge broken runs containing mustache placeholders. :param paragraph: paragraph xml element containing broken runs docx document is made up of paragraph among other things. A run is a part of a paragraph with different formatting(color, bold...). But most times Microsoft word and libreoffice Writer splits up text with same formatting into different runs. If this text contains mustache placeholders, it will be missed by mustache renderer. This method merges runs into one if it contains mustache placeholders. """ runs = paragraph.getElementsByTagNameNS(_namespaces['w'], 'r') def _merge(run: Element, text_to_replace='', open_brace_count=0, close_brace_count=0, runs_to_merge=None): """Merge placeholders broken into runs Microsoft Word and libreoffice most times split placeholders into multiple runs. For example <w:r> <w:rPr> <w:b w:val="false"/> <w:bCs w:val="false"/> </w:rPr> <w:t>{{PRODUCTS</w:t> </w:r> <w:r> <w:rPr/> <w:t>}}</w:t> </w:r> We need to merge this into one run while retaining the style """ if runs_to_merge is None: runs_to_merge = [] if run is None: return elif not run_contains_text(run): pass else: text = get_run_text(run) open_brace_count += text.count('{{') close_brace_count += text.count('}}') text_to_replace += text # Once we have matching nodes, set text_to_replace as value to the # last run and remove previous runs if not open_brace_count == close_brace_count: runs_to_merge.append(run) elif runs_to_merge: set_run_text(run, text_to_replace) for r in runs_to_merge: paragraph.removeChild(r) runs_to_merge = [] text_to_replace = '' return _merge(run.nextSibling, text_to_replace, open_brace_count, close_brace_count, runs_to_merge) _merge(runs[0])