Пример #1
0
def set_run_text(run: Element, text: str):
    dom = run.ownerDocument

    if run_contains_text(run):
        text_element = run.getElementsByTagNameNS(_namespaces['w'], 't')[0]
        set_text(text_element, text)
    else:
        text_element = dom.createElementNS(_namespaces['w'], 'w:t')
        set_text(text_element, text)
        run.appendChild(text_element)
Пример #2
0
def get_text_from_paragraph_runs(node: Element, list_of_runs_indexes: list = None) -> str:
    """Get all the text from the paragraph

    :param node: Paragraph to extract text from
    :param list_of_runs_indexes: List of runs indexes to extract text from
    :return: String containing all text inside paragraph

    Iterates through all runs inside paragraph appends text and returns it.
    """
    text = ''
    runs = node.getElementsByTagNameNS(_namespaces['w'], 'r')

    if list_of_runs_indexes is not None:
        runs = list(runs[i] for i in list_of_runs_indexes)

    for run in runs:
        if run_contains_text(run):
            text_element = run.getElementsByTagNameNS(_namespaces['w'], 't')
            # Each run will contains only one text element. So select first
            # (0th index) element
            text += text_element[0].firstChild.nodeValue
    return text
Пример #3
0
def clear_run_text(run: Element):
    if run_contains_text(run):
        text_element = run.getElementsByTagNameNS(_namespaces['w'], 't')[0]
        set_text(text_element, '')
Пример #4
0
def get_run_text(run: Element):
    if run_contains_text(run):
        text_element = run.getElementsByTagNameNS(_namespaces['w'], 't')[0]
        return get_text(text_element)
    else:
        return None
Пример #5
0
def _merge_placeholder_broken_inside_runs_if_required(paragraph: Element):
    """Merge broken runs containing mustache placeholders.

    :param paragraph: paragraph xml element containing broken runs

    docx document is made up of paragraph among other things. A run is a part
    of a paragraph with different formatting(color, bold...). But most times
    Microsoft word and libreoffice Writer splits up text with same formatting
    into different runs. If this text contains mustache placeholders, it will
    be missed by mustache renderer.
    This method merges runs into one if it contains mustache placeholders.
    """

    runs = paragraph.getElementsByTagNameNS(_namespaces['w'], 'r')

    def _merge(run: Element,
               text_to_replace='',
               open_brace_count=0,
               close_brace_count=0,
               runs_to_merge=None):
        """Merge placeholders broken into runs

        Microsoft Word and libreoffice most times split placeholders into
        multiple runs. For example
        <w:r>
            <w:rPr>
                <w:b w:val="false"/>
                <w:bCs w:val="false"/>
            </w:rPr>
            <w:t>{{PRODUCTS</w:t>
        </w:r>
        <w:r>
            <w:rPr/>
            <w:t>}}</w:t>
        </w:r>

        We need to merge this into one run while retaining the style
        """
        if runs_to_merge is None:
            runs_to_merge = []

        if run is None:
            return
        elif not run_contains_text(run):
            pass
        else:
            text = get_run_text(run)

            open_brace_count += text.count('{{')
            close_brace_count += text.count('}}')

            text_to_replace += text

            # Once we have matching nodes, set text_to_replace as value to the
            # last run and remove previous runs
            if not open_brace_count == close_brace_count:
                runs_to_merge.append(run)
            elif runs_to_merge:
                set_run_text(run, text_to_replace)
                for r in runs_to_merge:
                    paragraph.removeChild(r)
                runs_to_merge = []
                text_to_replace = ''
        return _merge(run.nextSibling, text_to_replace, open_brace_count,
                      close_brace_count, runs_to_merge)

    _merge(runs[0])
Пример #6
0
def _merge_placeholder_broken_inside_runs_if_required(paragraph: Element):
    """Merge broken runs containing mustache placeholders.

    :param paragraph: paragraph xml element containing broken runs

    docx document is made up of paragraph among other things. A run is a part
    of a paragraph with different formatting(color, bold...). But most times
    Microsoft word and libreoffice Writer splits up text with same formatting
    into different runs. If this text contains mustache placeholders, it will
    be missed by mustache renderer.
    This method merges runs into one if it contains mustache placeholders.
    """

    runs = paragraph.getElementsByTagNameNS(_namespaces['w'], 'r')

    def _merge(run: Element, text_to_replace='', open_brace_count=0, close_brace_count=0, runs_to_merge=None):
        """Merge placeholders broken into runs

        Microsoft Word and libreoffice most times split placeholders into
        multiple runs. For example
        <w:r>
            <w:rPr>
                <w:b w:val="false"/>
                <w:bCs w:val="false"/>
            </w:rPr>
            <w:t>{{PRODUCTS</w:t>
        </w:r>
        <w:r>
            <w:rPr/>
            <w:t>}}</w:t>
        </w:r>

        We need to merge this into one run while retaining the style
        """
        if runs_to_merge is None:
            runs_to_merge = []

        if run is None:
            return
        elif not run_contains_text(run):
            pass
        else:
            text = get_run_text(run)

            open_brace_count  += text.count('{{')
            close_brace_count += text.count('}}')

            text_to_replace += text

            # Once we have matching nodes, set text_to_replace as value to the
            # last run and remove previous runs
            if not open_brace_count == close_brace_count:
                runs_to_merge.append(run)
            elif runs_to_merge:
                set_run_text(run, text_to_replace)
                for r in runs_to_merge:
                    paragraph.removeChild(r)
                runs_to_merge = []
                text_to_replace = ''
        return _merge(run.nextSibling, text_to_replace, open_brace_count, close_brace_count, runs_to_merge)

    _merge(runs[0])