Пример #1
0
    def convert_run(self, run):
        ans = SPAN()
        self.object_map[ans] = run
        text = Text(ans, 'text', [])

        for child in run:
            if is_tag(child, 'w:t'):
                if not child.text:
                    continue
                space = child.get(XML('space'), None)
                preserve = False
                if space == 'preserve':
                    # Only use a <span> with white-space:pre-wrap if this element
                    # actually needs it, i.e. if it has more than one
                    # consecutive space or it has newlines or tabs.
                    multi_spaces = self.ms_pat.search(child.text) is not None
                    preserve = multi_spaces or self.ws_pat.search(child.text) is not None
                if preserve:
                    text.add_elem(SPAN(child.text, style="white-space:pre-wrap"))
                    ans.append(text.elem)
                else:
                    text.buf.append(child.text)
            elif is_tag(child, 'w:cr'):
                text.add_elem(BR())
                ans.append(text.elem)
            elif is_tag(child, 'w:br'):
                typ = get(child, 'w:type')
                if typ in {'column', 'page'}:
                    br = BR(style='page-break-after:always')
                else:
                    clear = child.get('clear', None)
                    if clear in {'all', 'left', 'right'}:
                        br = BR(style='clear:%s'%('both' if clear == 'all' else clear))
                    else:
                        br = BR()
                text.add_elem(br)
                ans.append(text.elem)
            elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'):
                for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir):
                    text.add_elem(img)
                    ans.append(text.elem)
            elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'):
                anchor, name = self.footnotes.get_ref(child)
                if anchor and name:
                    l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor)
                    l.set('class', 'noteref')
                    text.add_elem(l)
                    ans.append(text.elem)
            elif is_tag(child, 'w:fldChar') and get(child, 'w:fldCharType') == 'separate':
                text.buf.append('\xa0')
        if text.buf:
            setattr(text.elem, text.attr, ''.join(text.buf))

        style = self.styles.resolve_run(run)
        if style.vert_align in {'superscript', 'subscript'}:
            ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
        if style.lang is not inherit:
            ans.lang = style.lang
        return ans
Пример #2
0
    def convert_run(self, run):
        ans = SPAN()
        self.object_map[ans] = run
        text = Text(ans, "text", [])

        for child in run:
            if is_tag(child, "w:t"):
                if not child.text:
                    continue
                space = child.get(XML("space"), None)
                if space == "preserve":
                    text.add_elem(SPAN(child.text, style="whitespace:pre-wrap"))
                    ans.append(text.elem)
                else:
                    text.buf.append(child.text)
            elif is_tag(child, "w:cr"):
                text.add_elem(BR())
                ans.append(text.elem)
            elif is_tag(child, "w:br"):
                typ = child.get("type", None)
                if typ in {"column", "page"}:
                    br = BR(style="page-break-after:always")
                else:
                    clear = child.get("clear", None)
                    if clear in {"all", "left", "right"}:
                        br = BR(style="clear:%s" % ("both" if clear == "all" else clear))
                    else:
                        br = BR()
                text.add_elem(br)
                ans.append(text.elem)
            elif is_tag(child, "w:drawing") or is_tag(child, "w:pict"):
                for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir):
                    text.add_elem(img)
                    ans.append(text.elem)
            elif is_tag(child, "w:footnoteReference") or is_tag(child, "w:endnoteReference"):
                anchor, name = self.footnotes.get_ref(child)
                if anchor and name:
                    l = SUP(A(name, href="#" + anchor, title=name), id="back_%s" % anchor)
                    l.set("class", "noteref")
                    text.add_elem(l)
                    ans.append(text.elem)
        if text.buf:
            setattr(text.elem, text.attr, "".join(text.buf))

        style = self.styles.resolve_run(run)
        if style.vert_align in {"superscript", "subscript"}:
            ans.tag = "sub" if style.vert_align == "subscript" else "sup"
        if style.lang is not inherit:
            ans.lang = style.lang
        return ans
Пример #3
0
        def commit(current_run):
            if not current_run:
                return
            start = current_run[0]
            parent = start.getparent()
            idx = parent.index(start)

            d = self.instances[start.get('list-id')]
            ilvl = int(start.get('list-lvl'))
            lvl = d.levels[ilvl]
            lvlid = start.get('list-id') + start.get('list-lvl')
            has_template = 'list-template' in start.attrib
            wrap = (OL if lvl.is_numbered or has_template else UL)('\n\t')
            if has_template:
                wrap.set('lvlid', lvlid)
            else:
                wrap.set(
                    'class',
                    styles.register(
                        lvl.css(images, self.pic_map, self.rid_map), 'list'))
            ccss = lvl.char_css()
            if ccss:
                ccss = styles.register(ccss, 'bullet')
            parent.insert(idx, wrap)
            last_val = None
            for child in current_run:
                wrap.append(child)
                child.tail = '\n\t'
                if has_template:
                    span = SPAN()
                    span.text = child.text
                    child.text = None
                    for gc in child:
                        span.append(gc)
                    child.append(span)
                    span = SPAN(child.get('list-template'))
                    if ccss:
                        span.set('class', ccss)
                    last = templates.get(lvlid, '')
                    if span.text and len(span.text) > len(last):
                        templates[lvlid] = span.text
                    child.insert(0, span)
                for attr in ('list-lvl', 'list-id', 'list-template'):
                    child.attrib.pop(attr, None)
                val = int(child.get('value'))
                if last_val == val - 1 or wrap.tag == 'ul':
                    child.attrib.pop('value')
                last_val = val
            current_run[-1].tail = '\n'
            del current_run[:]
Пример #4
0
    def convert_run(self, run):
        ans = SPAN()
        self.object_map[ans] = run
        text = Text(ans, 'text', [])

        for child in run:
            if is_tag(child, 'w:t'):
                if not child.text:
                    continue
                space = child.get(XML('space'), None)
                if space == 'preserve':
                    text.add_elem(SPAN(child.text, style="whitespace:pre-wrap"))
                    ans.append(text.elem)
                else:
                    text.buf.append(child.text)
            elif is_tag(child, 'w:cr'):
                text.add_elem(BR())
                ans.append(text.elem)
            elif is_tag(child, 'w:br'):
                typ = child.get('type', None)
                if typ in {'column', 'page'}:
                    br = BR(style='page-break-after:always')
                else:
                    clear = child.get('clear', None)
                    if clear in {'all', 'left', 'right'}:
                        br = BR(style='clear:%s'%('both' if clear == 'all' else clear))
                    else:
                        br = BR()
                text.add_elem(br)
                ans.append(text.elem)
            elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'):
                for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir):
                    text.add_elem(img)
                    ans.append(text.elem)
            elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'):
                anchor, name = self.footnotes.get_ref(child)
                if anchor and name:
                    l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor)
                    l.set('class', 'noteref')
                    text.add_elem(l)
                    ans.append(text.elem)
        if text.buf:
            setattr(text.elem, text.attr, ''.join(text.buf))

        style = self.styles.resolve_run(run)
        if style.vert_align in {'superscript', 'subscript'}:
            ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
        if style.lang is not inherit:
            ans.lang = style.lang
        return ans
Пример #5
0
        def commit(current_run):
            if not current_run:
                return
            start = current_run[0]
            parent = start.getparent()
            idx = parent.index(start)

            d = self.instances[start.get('list-id')]
            ilvl = int(start.get('list-lvl'))
            lvl = d.levels[ilvl]
            lvlid = start.get('list-id') + start.get('list-lvl')
            has_template = 'list-template' in start.attrib
            wrap = (OL if lvl.is_numbered or has_template else UL)('\n\t')
            if has_template:
                wrap.set('lvlid', lvlid)
            else:
                wrap.set('class', styles.register(lvl.css(images, self.pic_map, self.rid_map), 'list'))
            ccss = lvl.char_css()
            if ccss:
                ccss = styles.register(ccss, 'bullet')
            parent.insert(idx, wrap)
            last_val = None
            for child in current_run:
                wrap.append(child)
                child.tail = '\n\t'
                if has_template:
                    span = SPAN()
                    span.text = child.text
                    child.text = None
                    for gc in child:
                        span.append(gc)
                    child.append(span)
                    span = SPAN(child.get('list-template'))
                    if ccss:
                        span.set('class', ccss)
                    last = templates.get(lvlid, '')
                    if span.text and len(span.text) > len(last):
                        templates[lvlid] = span.text
                    child.insert(0, span)
                for attr in ('list-lvl', 'list-id', 'list-template'):
                    child.attrib.pop(attr, None)
                val = int(child.get('value'))
                if last_val == val - 1 or wrap.tag == 'ul' or (last_val is None and val == 1):
                    child.attrib.pop('value')
                last_val = val
            current_run[-1].tail = '\n'
            del current_run[:]
Пример #6
0
        def commit(current_run):
            if not current_run:
                return
            start = current_run[0]
            parent = start.getparent()
            idx = parent.index(start)

            d = self.instances[start.get("list-id")]
            ilvl = int(start.get("list-lvl"))
            lvl = d.levels[ilvl]
            lvlid = start.get("list-id") + start.get("list-lvl")
            has_template = "list-template" in start.attrib
            wrap = (OL if lvl.is_numbered or has_template else UL)("\n\t")
            if has_template:
                wrap.set("lvlid", lvlid)
            else:
                wrap.set("class", styles.register(lvl.css(images, self.pic_map, self.rid_map), "list"))
            ccss = lvl.char_css()
            if ccss:
                ccss = styles.register(ccss, "bullet")
            parent.insert(idx, wrap)
            last_val = None
            for child in current_run:
                wrap.append(child)
                child.tail = "\n\t"
                if has_template:
                    span = SPAN()
                    span.text = child.text
                    child.text = None
                    for gc in child:
                        span.append(gc)
                    child.append(span)
                    span = SPAN(child.get("list-template"))
                    if ccss:
                        span.set("class", ccss)
                    last = templates.get(lvlid, "")
                    if span.text and len(span.text) > len(last):
                        templates[lvlid] = span.text
                    child.insert(0, span)
                for attr in ("list-lvl", "list-id", "list-template"):
                    child.attrib.pop(attr, None)
                val = int(child.get("value"))
                if last_val == val - 1 or wrap.tag == "ul":
                    child.attrib.pop("value")
                last_val = val
            current_run[-1].tail = "\n"
            del current_run[:]
Пример #7
0
        def commit(current_run):
            if not current_run:
                return
            start = current_run[0]
            parent = start.getparent()
            idx = parent.index(start)

            d = self.instances[start.get('list-id')]
            ilvl = int(start.get('list-lvl'))
            lvl = d.levels[ilvl]
            lvlid = start.get('list-id') + start.get('list-lvl')
            wrap = (OL if lvl.is_numbered else UL)('\n\t')
            has_template = 'list-template' in start.attrib
            if has_template:
                wrap.set('lvlid', lvlid)
            else:
                wrap.set('class', styles.register({'list-style-type': lvl.fmt}, 'list'))
            parent.insert(idx, wrap)
            last_val = None
            for child in current_run:
                wrap.append(child)
                child.tail = '\n\t'
                if has_template:
                    span = SPAN()
                    span.text = child.text
                    child.text = None
                    for gc in child:
                        span.append(gc)
                    child.append(span)
                    span = SPAN(child.get('list-template'))
                    last = templates.get(lvlid, '')
                    if span.text and len(span.text) > len(last):
                        templates[lvlid] = span.text
                    child.insert(0, span)
                for attr in ('list-lvl', 'list-id', 'list-template'):
                    child.attrib.pop(attr, None)
                val = int(child.get('value'))
                if last_val == val - 1 or wrap.tag == 'ul':
                    child.attrib.pop('value')
                last_val = val
            current_run[-1].tail = '\n'
            del current_run[:]
Пример #8
0
    def convert_run(self, run):
        ans = SPAN()
        self.object_map[ans] = run
        text = Text(ans, 'text', [])

        for child in run:
            if self.namespace.is_tag(child, 'w:t'):
                if not child.text:
                    continue
                space = child.get(XML('space'), None)
                preserve = False
                ctext = child.text
                if space != 'preserve':
                    # Remove leading and trailing whitespace. Word ignores
                    # leading and trailing whitespace without preserve
                    ctext = ctext.strip(' \n\r\t')
                # Only use a <span> with white-space:pre-wrap if this element
                # actually needs it, i.e. if it has more than one
                # consecutive space or it has newlines or tabs.
                multi_spaces = self.ms_pat.search(ctext) is not None
                preserve = multi_spaces or self.ws_pat.search(ctext) is not None
                if preserve:
                    text.add_elem(SPAN(ctext, style="white-space:pre-wrap"))
                    ans.append(text.elem)
                else:
                    text.buf.append(ctext)
            elif self.namespace.is_tag(child, 'w:cr'):
                text.add_elem(BR())
                ans.append(text.elem)
            elif self.namespace.is_tag(child, 'w:br'):
                typ = self.namespace.get(child, 'w:type')
                if typ in {'column', 'page'}:
                    br = BR(style='page-break-after:always')
                else:
                    clear = child.get('clear', None)
                    if clear in {'all', 'left', 'right'}:
                        br = BR(style='clear:%s'%('both' if clear == 'all' else clear))
                    else:
                        br = BR()
                text.add_elem(br)
                ans.append(text.elem)
            elif self.namespace.is_tag(child, 'w:drawing') or self.namespace.is_tag(child, 'w:pict'):
                for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir):
                    text.add_elem(img)
                    ans.append(text.elem)
            elif self.namespace.is_tag(child, 'w:footnoteReference') or self.namespace.is_tag(child, 'w:endnoteReference'):
                anchor, name = self.footnotes.get_ref(child)
                if anchor and name:
                    l = A(SUP(name, id='back_%s' % anchor), href='#' + anchor, title=name)
                    l.set('class', 'noteref')
                    text.add_elem(l)
                    ans.append(text.elem)
            elif self.namespace.is_tag(child, 'w:tab'):
                spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6))
                text.add_elem(SPAN(NBSP * spaces))
                ans.append(text.elem)
                ans[-1].set('class', 'tab')
            elif self.namespace.is_tag(child, 'w:noBreakHyphen'):
                text.buf.append('\u2011')
            elif self.namespace.is_tag(child, 'w:softHyphen'):
                text.buf.append('\u00ad')
        if text.buf:
            setattr(text.elem, text.attr, ''.join(text.buf))

        style = self.styles.resolve_run(run)
        if style.vert_align in {'superscript', 'subscript'}:
            ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
        if style.lang is not inherit:
            lang = html_lang(style.lang)
            if lang is not None and lang != self.doc_lang:
                ans.set('lang', lang)
        if style.rtl is True:
            ans.set('dir', 'rtl')
        if is_symbol_font(style.font_family):
            for elem in text:
                if elem.text:
                    elem.text = map_symbol_text(elem.text, style.font_family)
                if elem.tail:
                    elem.tail = map_symbol_text(elem.tail, style.font_family)
            style.font_family = 'sans-serif'
        return ans
Пример #9
0
    def convert_run(self, run):
        ans = SPAN()
        self.object_map[ans] = run
        text = Text(ans, 'text', [])

        for child in run:
            if is_tag(child, 'w:t'):
                if not child.text:
                    continue
                space = child.get(XML('space'), None)
                preserve = False
                if space == 'preserve':
                    # Only use a <span> with white-space:pre-wrap if this element
                    # actually needs it, i.e. if it has more than one
                    # consecutive space or it has newlines or tabs.
                    multi_spaces = self.ms_pat.search(child.text) is not None
                    preserve = multi_spaces or self.ws_pat.search(
                        child.text) is not None
                if preserve:
                    text.add_elem(
                        SPAN(child.text, style="white-space:pre-wrap"))
                    ans.append(text.elem)
                else:
                    text.buf.append(child.text)
            elif is_tag(child, 'w:cr'):
                text.add_elem(BR())
                ans.append(text.elem)
            elif is_tag(child, 'w:br'):
                typ = get(child, 'w:type')
                if typ in {'column', 'page'}:
                    br = BR(style='page-break-after:always')
                else:
                    clear = child.get('clear', None)
                    if clear in {'all', 'left', 'right'}:
                        br = BR(style='clear:%s' %
                                ('both' if clear == 'all' else clear))
                    else:
                        br = BR()
                text.add_elem(br)
                ans.append(text.elem)
            elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'):
                for img in self.images.to_html(child, self.current_page,
                                               self.docx, self.dest_dir):
                    text.add_elem(img)
                    ans.append(text.elem)
            elif is_tag(child, 'w:footnoteReference') or is_tag(
                    child, 'w:endnoteReference'):
                anchor, name = self.footnotes.get_ref(child)
                if anchor and name:
                    l = SUP(A(name, href='#' + anchor, title=name),
                            id='back_%s' % anchor)
                    l.set('class', 'noteref')
                    text.add_elem(l)
                    ans.append(text.elem)
            elif is_tag(child, 'w:tab'):
                spaces = int(
                    math.ceil((self.settings.default_tab_stop / 36) * 6))
                text.add_elem(SPAN(NBSP * spaces))
                ans.append(text.elem)
                ans[-1].set('class', 'tab')
        if text.buf:
            setattr(text.elem, text.attr, ''.join(text.buf))

        style = self.styles.resolve_run(run)
        if style.vert_align in {'superscript', 'subscript'}:
            ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
        if style.lang is not inherit:
            ans.lang = style.lang
        return ans
Пример #10
0
    def convert_run(self, run):
        ans = SPAN()
        self.object_map[ans] = run
        text = Text(ans, 'text', [])

        for child in run:
            if is_tag(child, 'w:t'):
                if not child.text:
                    continue
                space = child.get(XML('space'), None)
                preserve = False
                ctext = child.text
                if space != 'preserve':
                    # Remove leading and trailing whitespace. Word ignores
                    # leading and trailing whitespace without preserve
                    ctext = ctext.strip(' \n\r\t')
                # Only use a <span> with white-space:pre-wrap if this element
                # actually needs it, i.e. if it has more than one
                # consecutive space or it has newlines or tabs.
                multi_spaces = self.ms_pat.search(ctext) is not None
                preserve = multi_spaces or self.ws_pat.search(ctext) is not None
                if preserve:
                    text.add_elem(SPAN(ctext, style="white-space:pre-wrap"))
                    ans.append(text.elem)
                else:
                    text.buf.append(ctext)
            elif is_tag(child, 'w:cr'):
                text.add_elem(BR())
                ans.append(text.elem)
            elif is_tag(child, 'w:br'):
                typ = get(child, 'w:type')
                if typ in {'column', 'page'}:
                    br = BR(style='page-break-after:always')
                else:
                    clear = child.get('clear', None)
                    if clear in {'all', 'left', 'right'}:
                        br = BR(style='clear:%s'%('both' if clear == 'all' else clear))
                    else:
                        br = BR()
                text.add_elem(br)
                ans.append(text.elem)
            elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'):
                for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir):
                    text.add_elem(img)
                    ans.append(text.elem)
            elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'):
                anchor, name = self.footnotes.get_ref(child)
                if anchor and name:
                    l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor)
                    l.set('class', 'noteref')
                    text.add_elem(l)
                    ans.append(text.elem)
            elif is_tag(child, 'w:tab'):
                spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6))
                text.add_elem(SPAN(NBSP * spaces))
                ans.append(text.elem)
                ans[-1].set('class', 'tab')
            elif is_tag(child, 'w:noBreakHyphen'):
                text.buf.append(u'\u2011')
            elif is_tag(child, 'w:softHyphen'):
                text.buf.append(u'\u00ad')
        if text.buf:
            setattr(text.elem, text.attr, ''.join(text.buf))

        style = self.styles.resolve_run(run)
        if style.vert_align in {'superscript', 'subscript'}:
            ans.tag = 'sub' if style.vert_align == 'subscript' else 'sup'
        if style.lang is not inherit:
            lang = html_lang(style.lang)
            if lang is not None and lang != self.doc_lang:
                ans.set('lang', lang)
        return ans