Python ImagesManager примеры использования

Язык программирования: Python

Пространство имен/Пакет: calibre.ebooks.docx.writer.images

Класс/Тип: ImagesManager

Примеров на hotexamples.com: 13

Python ImagesManager - 13 примеров найдено. Это лучшие примеры Python кода для calibre.ebooks.docx.writer.images.ImagesManager, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ImagesManager(3)

add_image(3)

serialize(3)

create_cover_markup(1)

read_image(1)

write_cover_block(1)

Пример #1

Показать файл

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer()
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace)
        self.images_manager = ImagesManager(self.oeb,
                                            self.docx.document_relationships)
        self.lists_manager = ListsManager(self.docx)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb,
                                          self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager)

        for item in self.oeb.spine:
            self.process_item(item)

        all_blocks = self.blocks.all_blocks
        remove_blocks = []
        for i, block in enumerate(all_blocks):
            try:
                nb = all_blocks[i + 1]
            except IndexError:
                break
            block.resolve_skipped(nb)
            if block.skipped:
                remove_blocks.append((i, block))
        for pos, block in reversed(remove_blocks):
            self.blocks.delete_block_at(pos)

        self.lists_manager.finalize(all_blocks)
        self.styles_manager.finalize(all_blocks)
        self.write()

Пример #2

Показать файл

Файл: from_html.py Проект: smdx023/calibre

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace, self.log,
                                            self.mi.language)
        self.links_manager = LinksManager(self.docx.namespace,
                                          self.docx.document_relationships,
                                          self.log)
        self.images_manager = ImagesManager(self.oeb,
                                            self.docx.document_relationships,
                                            self.opts)
        self.lists_manager = ListsManager(self.docx)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb,
                                          self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager,
                             self.links_manager)
        self.current_link = self.current_lang = None

        for item in self.oeb.spine:
            self.log.debug('Processing', item.href)
            self.process_item(item)
        if self.add_toc:
            self.links_manager.process_toc_links(self.oeb)

        if self.add_cover and self.oeb.metadata.cover and str(
                self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
            cover_id = str(self.oeb.metadata.cover[0])
            item = self.oeb.manifest.ids[cover_id]
            self.cover_img = self.images_manager.read_image(item.href)

        all_blocks = self.blocks.all_blocks
        remove_blocks = []
        for i, block in enumerate(all_blocks):
            try:
                nb = all_blocks[i + 1]
            except IndexError:
                break
            block.resolve_skipped(nb)
            if block.skipped:
                remove_blocks.append((i, block))
        for pos, block in reversed(remove_blocks):
            self.blocks.delete_block_at(pos)
        self.blocks.all_blocks[0].is_first_block = True
        self.blocks.apply_page_break_after()
        self.blocks.resolve_language()

        if self.cover_img is not None:
            self.cover_img = self.images_manager.create_cover_markup(
                self.cover_img, self.opts.preserve_cover_aspect_ratio,
                *page_size(self.opts))
        self.lists_manager.finalize(all_blocks)
        self.styles_manager.finalize(all_blocks)
        self.write()

Пример #3

Показать файл

Файл: from_html.py Проект: pombreda/calibre

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer()
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace)
        self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb, self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager)

        for item in self.oeb.spine:
            self.process_item(item)

        self.styles_manager.finalize(self.blocks.all_blocks)
        self.write()

Пример #4

Показать файл

Файл: from_html.py Проект: educhenm/calibre

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace)
        self.links_manager = LinksManager(self.docx.namespace, self.docx.document_relationships)
        self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships)
        self.lists_manager = ListsManager(self.docx)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb, self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager, self.links_manager)
        self.current_link = None

        for item in self.oeb.spine:
            self.process_item(item)

        all_blocks = self.blocks.all_blocks
        remove_blocks = []
        for i, block in enumerate(all_blocks):
            try:
                nb = all_blocks[i+1]
            except IndexError:
                break
            block.resolve_skipped(nb)
            if block.skipped:
                remove_blocks.append((i, block))
        for pos, block in reversed(remove_blocks):
            self.blocks.delete_block_at(pos)
        self.blocks.all_blocks[0].is_first_block = True

        self.lists_manager.finalize(all_blocks)
        self.styles_manager.finalize(all_blocks)
        self.write()

Пример #5

Показать файл

Файл: from_html.py Проект: v0re/calibre

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer()
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager()
        self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships)
        self.fonts_manager = FontsManager(self.oeb)

        for item in self.oeb.spine:
            self.process_item(item)

        self.styles_manager.finalize(self.blocks)
        self.write()

Пример #6

Показать файл

Файл: from_html.py Проект: aimylios/calibre

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace, self.log, self.mi.language)
        self.links_manager = LinksManager(self.docx.namespace, self.docx.document_relationships, self.log)
        self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships, self.opts)
        self.lists_manager = ListsManager(self.docx)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb, self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager, self.links_manager)
        self.current_link = self.current_lang = None

        for item in self.oeb.spine:
            self.log.debug('Processing', item.href)
            self.process_item(item)
        if self.add_toc:
            self.links_manager.process_toc_links(self.oeb)

        if self.add_cover and self.oeb.metadata.cover and unicode(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
            cover_id = unicode(self.oeb.metadata.cover[0])
            item = self.oeb.manifest.ids[cover_id]
            self.cover_img = self.images_manager.read_image(item.href)

        all_blocks = self.blocks.all_blocks
        remove_blocks = []
        for i, block in enumerate(all_blocks):
            try:
                nb = all_blocks[i+1]
            except IndexError:
                break
            block.resolve_skipped(nb)
            if block.skipped:
                remove_blocks.append((i, block))
        for pos, block in reversed(remove_blocks):
            self.blocks.delete_block_at(pos)
        self.blocks.all_blocks[0].is_first_block = True
        self.blocks.apply_page_break_after()
        self.blocks.resolve_language()

        if self.cover_img is not None:
            self.cover_img = self.images_manager.create_cover_markup(self.cover_img, self.opts.preserve_cover_aspect_ratio, *page_size(self.opts))
        self.lists_manager.finalize(all_blocks)
        self.styles_manager.finalize(all_blocks)
        self.write()

Пример #7

Показать файл

Файл: from_html.py Проект: CharlesCai930/calibre

class Convert(object):

    def __init__(self, oeb, docx):
        self.oeb, self.docx = oeb, docx
        self.log, self.opts = docx.log, docx.opts

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer()
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager()
        self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships)
        self.fonts_manager = FontsManager(self.oeb, self.opts)
        self.blocks = Blocks(self.styles_manager)

        for item in self.oeb.spine:
            self.process_item(item)

        self.styles_manager.finalize(self.blocks.all_blocks)
        self.write()

    def process_item(self, item):
        stylizer = self.svg_rasterizer.stylizer_cache.get(item)
        if stylizer is None:
            stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.opts.output_profile)
        self.abshref = self.images_manager.abshref = item.abshref

        for i, body in enumerate(XPath('//h:body')(item.data)):
            with self.blocks:
                self.process_tag(body, stylizer, is_first_tag=i == 0)

    def process_tag(self, html_tag, stylizer, is_first_tag=False):
        tagname = barename(html_tag.tag)
        if tagname in {'script', 'style', 'title', 'meta'}:
            return
        tag_style = stylizer.style(html_tag)
        if tag_style.is_hidden:
            return
        display = tag_style._get('display')
        if display in {'inline', 'inline-block'} or tagname == 'br':  # <br> has display:block but we dont want to start a new paragraph
            self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
        elif display == 'list-item':
            # TODO: Implement this
            self.add_block_tag(tagname, html_tag, tag_style, stylizer)
        elif display.startswith('table') or display == 'inline-table':
            if display == 'table-cell':
                self.blocks.start_new_cell(html_tag, tag_style)
                self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_table_cell=True)
            elif display == 'table-row':
                self.blocks.start_new_row(html_tag, tag_style)
            elif display in {'table', 'inline-table'}:
                self.blocks.start_new_table(html_tag, tag_style)
        else:
            if tagname == 'img' and tag_style['float'] in {'left', 'right'}:
                # Image is floating so dont start a new paragraph for it
                self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
            else:
                self.add_block_tag(tagname, html_tag, tag_style, stylizer)

        for child in html_tag.iterchildren('*'):
            self.process_tag(child, stylizer)

        is_block = html_tag in self.blocks.open_html_blocks
        self.blocks.finish_tag(html_tag)
        if is_block and tag_style['page-break-after'] == 'avoid':
            self.blocks.all_blocks[-1].keep_next = True

        if display == 'table-row':
            return  # We ignore the tail for these tags

        if not is_first_tag and html_tag.tail and (not is_block or not html_tag.tail.isspace()):
            # Ignore trailing space after a block tag, as otherwise it will
            # become a new empty paragraph
            block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
            block.add_text(html_tag.tail, stylizer.style(html_tag.getparent()), is_parent_style=True)

    def add_block_tag(self, tagname, html_tag, tag_style, stylizer, is_table_cell=False):
        block = self.blocks.start_new_block(html_tag, tag_style, is_table_cell=is_table_cell)
        if tagname == 'img':
            self.images_manager.add_image(html_tag, block, stylizer)
        else:
            if html_tag.text:
                block.add_text(html_tag.text, tag_style, ignore_leading_whitespace=True, is_parent_style=True)

    def add_inline_tag(self, tagname, html_tag, tag_style, stylizer):
        if tagname == 'br':
            if html_tag.tail or html_tag is not tuple(html_tag.getparent().iterchildren('*'))[-1]:
                block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
                block.add_break(clear={'both':'all', 'left':'left', 'right':'right'}.get(tag_style['clear'], 'none'))
        elif tagname == 'img':
            block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
            self.images_manager.add_image(html_tag, block, stylizer)
        else:
            if html_tag.text:
                block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
                block.add_text(html_tag.text, tag_style, is_parent_style=False)

    def write(self):
        self.docx.document, self.docx.styles, body = create_skeleton(self.opts)
        self.blocks.serialize(body)
        body.append(body[0])  # Move <sectPr> to the end
        self.styles_manager.serialize(self.docx.styles)
        self.images_manager.serialize(self.docx.images)
        self.fonts_manager.serialize(self.styles_manager.text_styles, self.docx.font_table, self.docx.embedded_fonts, self.docx.fonts)

Пример #8

Показать файл

Файл: from_html.py Проект: pombreda/calibre

class Convert(object):

    def __init__(self, oeb, docx):
        self.oeb, self.docx = oeb, docx
        self.log, self.opts = docx.log, docx.opts

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer()
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace)
        self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb, self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager)

        for item in self.oeb.spine:
            self.process_item(item)

        self.styles_manager.finalize(self.blocks.all_blocks)
        self.write()

    def process_item(self, item):
        stylizer = self.svg_rasterizer.stylizer_cache.get(item)
        if stylizer is None:
            stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.opts.output_profile)
        self.abshref = self.images_manager.abshref = item.abshref

        for i, body in enumerate(XPath('//h:body')(item.data)):
            with self.blocks:
                self.process_tag(body, stylizer, is_first_tag=i == 0)

    def process_tag(self, html_tag, stylizer, is_first_tag=False):
        tagname = barename(html_tag.tag)
        if tagname in {'script', 'style', 'title', 'meta'}:
            return
        tag_style = stylizer.style(html_tag)
        if tag_style.is_hidden:
            return
        display = tag_style._get('display')
        if display in {'inline', 'inline-block'} or tagname == 'br':  # <br> has display:block but we dont want to start a new paragraph
            self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
        elif display == 'list-item':
            # TODO: Implement this
            self.add_block_tag(tagname, html_tag, tag_style, stylizer)
        elif display.startswith('table') or display == 'inline-table':
            if display == 'table-cell':
                self.blocks.start_new_cell(html_tag, tag_style)
                self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_table_cell=True)
            elif display == 'table-row':
                self.blocks.start_new_row(html_tag, tag_style)
            elif display in {'table', 'inline-table'}:
                self.blocks.end_current_block()
                self.blocks.start_new_table(html_tag, tag_style)
        else:
            if tagname == 'img' and tag_style['float'] in {'left', 'right'}:
                # Image is floating so dont start a new paragraph for it
                self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
            else:
                self.add_block_tag(tagname, html_tag, tag_style, stylizer)

        for child in html_tag.iterchildren('*'):
            self.process_tag(child, stylizer)

        is_block = html_tag in self.blocks.open_html_blocks
        self.blocks.finish_tag(html_tag)
        if is_block and tag_style['page-break-after'] == 'avoid':
            self.blocks.all_blocks[-1].keep_next = True

        if display == 'table-row':
            return  # We ignore the tail for these tags

        ignore_whitespace_tail = is_block or display.startswith('table')
        if not is_first_tag and html_tag.tail and (not ignore_whitespace_tail or not html_tag.tail.isspace()):
            # Ignore trailing space after a block tag, as otherwise it will
            # become a new empty paragraph
            block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
            block.add_text(html_tag.tail, stylizer.style(html_tag.getparent()), is_parent_style=True)

    def add_block_tag(self, tagname, html_tag, tag_style, stylizer, is_table_cell=False):
        block = self.blocks.start_new_block(html_tag, tag_style, is_table_cell=is_table_cell)
        if tagname == 'img':
            self.images_manager.add_image(html_tag, block, stylizer)
        else:
            if html_tag.text:
                block.add_text(html_tag.text, tag_style, ignore_leading_whitespace=True, is_parent_style=True)

    def add_inline_tag(self, tagname, html_tag, tag_style, stylizer):
        if tagname == 'br':
            if html_tag.tail or html_tag is not tuple(html_tag.getparent().iterchildren('*'))[-1]:
                block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
                block.add_break(clear={'both':'all', 'left':'left', 'right':'right'}.get(tag_style['clear'], 'none'))
        elif tagname == 'img':
            block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
            self.images_manager.add_image(html_tag, block, stylizer)
        else:
            if html_tag.text:
                block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
                block.add_text(html_tag.text, tag_style, is_parent_style=False)

    def write(self):
        self.docx.document, self.docx.styles, body = create_skeleton(self.opts)
        self.blocks.serialize(body)
        body.append(body[0])  # Move <sectPr> to the end
        self.styles_manager.serialize(self.docx.styles)
        self.images_manager.serialize(self.docx.images)
        self.fonts_manager.serialize(self.styles_manager.text_styles, self.docx.font_table, self.docx.embedded_fonts, self.docx.fonts)

Пример #9

Показать файл

Файл: from_html.py Проект: smdx023/calibre

class Convert:

    # Word does not apply default styling to hyperlinks, so we ensure they get
    # default styling (the conversion pipeline does not apply any styling to
    # them).
    base_css = '''
    a[href] { text-decoration: underline; color: blue }
    '''

    def __init__(self, oeb, docx, mi, add_cover, add_toc):
        self.oeb, self.docx, self.add_cover, self.add_toc = oeb, docx, add_cover, add_toc
        self.log, self.opts = docx.log, docx.opts
        self.mi = mi
        self.cover_img = None
        p = self.opts.output_profile
        p.width_pts, p.height_pts = page_effective_area(self.opts)

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace, self.log,
                                            self.mi.language)
        self.links_manager = LinksManager(self.docx.namespace,
                                          self.docx.document_relationships,
                                          self.log)
        self.images_manager = ImagesManager(self.oeb,
                                            self.docx.document_relationships,
                                            self.opts)
        self.lists_manager = ListsManager(self.docx)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb,
                                          self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager,
                             self.links_manager)
        self.current_link = self.current_lang = None

        for item in self.oeb.spine:
            self.log.debug('Processing', item.href)
            self.process_item(item)
        if self.add_toc:
            self.links_manager.process_toc_links(self.oeb)

        if self.add_cover and self.oeb.metadata.cover and str(
                self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
            cover_id = str(self.oeb.metadata.cover[0])
            item = self.oeb.manifest.ids[cover_id]
            self.cover_img = self.images_manager.read_image(item.href)

        all_blocks = self.blocks.all_blocks
        remove_blocks = []
        for i, block in enumerate(all_blocks):
            try:
                nb = all_blocks[i + 1]
            except IndexError:
                break
            block.resolve_skipped(nb)
            if block.skipped:
                remove_blocks.append((i, block))
        for pos, block in reversed(remove_blocks):
            self.blocks.delete_block_at(pos)
        self.blocks.all_blocks[0].is_first_block = True
        self.blocks.apply_page_break_after()
        self.blocks.resolve_language()

        if self.cover_img is not None:
            self.cover_img = self.images_manager.create_cover_markup(
                self.cover_img, self.opts.preserve_cover_aspect_ratio,
                *page_size(self.opts))
        self.lists_manager.finalize(all_blocks)
        self.styles_manager.finalize(all_blocks)
        self.write()

    def process_item(self, item):
        self.current_item = item
        stylizer = self.svg_rasterizer.stylizer_cache.get(item)
        if stylizer is None:
            stylizer = Stylizer(item.data,
                                item.href,
                                self.oeb,
                                self.opts,
                                profile=self.opts.output_profile,
                                base_css=self.base_css)
        self.abshref = self.images_manager.abshref = item.abshref

        self.current_lang = lang_for_tag(
            item.data) or self.styles_manager.document_lang
        for i, body in enumerate(XPath('//h:body')(item.data)):
            with self.blocks:
                self.blocks.top_bookmark = self.links_manager.bookmark_for_anchor(
                    self.links_manager.top_anchor, self.current_item, body)
                self.process_tag(body, stylizer, is_first_tag=i == 0)

    def process_tag(self,
                    html_tag,
                    stylizer,
                    is_first_tag=False,
                    float_spec=None):
        tagname = barename(html_tag.tag)
        tag_style = stylizer.style(html_tag)
        ignore_tag_contents = tagname in {'script', 'style', 'title', 'meta'
                                          } or tag_style.is_hidden
        display = tag_style._get('display')
        is_block = False

        if not ignore_tag_contents:
            previous_link = self.current_link
            if tagname == 'a' and html_tag.get('href'):
                self.current_link = (self.current_item, html_tag.get('href'),
                                     html_tag.get('title'))
            previous_lang = self.current_lang
            tag_lang = lang_for_tag(html_tag)
            if tag_lang:
                self.current_lang = tag_lang

            is_float = tag_style['float'] in {'left', 'right'
                                              } and not is_first_tag
            if float_spec is None and is_float:
                float_spec = FloatSpec(self.docx.namespace, html_tag,
                                       tag_style)

            if display in {
                    'inline', 'inline-block'
            } or tagname == 'br':  # <br> has display:block but we dont want to start a new paragraph
                if is_float and float_spec.is_dropcaps:
                    self.add_block_tag(tagname,
                                       html_tag,
                                       tag_style,
                                       stylizer,
                                       float_spec=float_spec)
                    float_spec = None
                else:
                    self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
            elif display == 'list-item':
                self.add_block_tag(tagname,
                                   html_tag,
                                   tag_style,
                                   stylizer,
                                   is_list_item=True)
            elif display.startswith('table') or display == 'inline-table':
                if display == 'table-cell':
                    self.blocks.start_new_cell(html_tag, tag_style)
                    self.add_block_tag(tagname,
                                       html_tag,
                                       tag_style,
                                       stylizer,
                                       is_table_cell=True)
                elif display == 'table-row':
                    self.blocks.start_new_row(html_tag, tag_style)
                elif display in {'table', 'inline-table'}:
                    self.blocks.end_current_block()
                    self.blocks.start_new_table(html_tag, tag_style)
            else:
                if tagname == 'img' and is_float:
                    # Image is floating so dont start a new paragraph for it
                    self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
                else:
                    if tagname == 'hr':
                        for edge in 'right bottom left'.split():
                            tag_style.set('border-%s-style' % edge, 'none')
                    self.add_block_tag(tagname,
                                       html_tag,
                                       tag_style,
                                       stylizer,
                                       float_spec=float_spec)

            for child in html_tag.iterchildren():
                if isinstance(getattr(child, 'tag', None), string_or_bytes):
                    self.process_tag(child, stylizer, float_spec=float_spec)
                else:  # Comment/PI/etc.
                    tail = getattr(child, 'tail', None)
                    if tail:
                        block = self.create_block_from_parent(
                            html_tag, stylizer)
                        block.add_text(tail,
                                       tag_style,
                                       is_parent_style=False,
                                       link=self.current_link,
                                       lang=self.current_lang)

            is_block = html_tag in self.blocks.open_html_blocks
            self.blocks.finish_tag(html_tag)
            if is_block and tag_style['page-break-after'] == 'avoid':
                self.blocks.all_blocks[-1].keep_next = True

            self.current_link = previous_link
            self.current_lang = previous_lang

        # Now, process the tail if any

        if display == 'table-row':
            return  # We ignore the tail for these tags

        ignore_whitespace_tail = is_block or display.startswith('table')
        if not is_first_tag and html_tag.tail and (
                not ignore_whitespace_tail or not html_tag.tail.isspace()):
            # Ignore trailing space after a block tag, as otherwise it will
            # become a new empty paragraph
            block = self.create_block_from_parent(html_tag, stylizer)
            block.add_text(html_tag.tail,
                           stylizer.style(html_tag.getparent()),
                           is_parent_style=True,
                           link=self.current_link,
                           lang=self.current_lang)

    def create_block_from_parent(self, html_tag, stylizer):
        parent = html_tag.getparent()
        block = self.blocks.current_or_new_block(parent,
                                                 stylizer.style(parent))
        # Do not inherit page-break-before from parent
        block.page_break_before = False
        return block

    def add_block_tag(self,
                      tagname,
                      html_tag,
                      tag_style,
                      stylizer,
                      is_table_cell=False,
                      float_spec=None,
                      is_list_item=False):
        block = self.blocks.start_new_block(html_tag,
                                            tag_style,
                                            is_table_cell=is_table_cell,
                                            float_spec=float_spec,
                                            is_list_item=is_list_item)
        anchor = html_tag.get('id') or html_tag.get('name')
        if anchor:
            block.bookmarks.add(self.bookmark_for_anchor(anchor, html_tag))
        if tagname == 'img':
            self.images_manager.add_image(html_tag,
                                          block,
                                          stylizer,
                                          as_block=True)
        else:
            text = html_tag.text
            is_list_item = tagname == 'li'
            has_sublist = is_list_item and len(html_tag) and barename(
                html_tag[0].tag) in ('ul', 'ol') and len(html_tag[0])
            if text and has_sublist and not text.strip():
                text = ''  # whitespace only, ignore
            if text:
                block.add_text(text,
                               tag_style,
                               ignore_leading_whitespace=True,
                               is_parent_style=True,
                               link=self.current_link,
                               lang=self.current_lang)
            elif has_sublist:
                block.force_not_empty = True

    def add_inline_tag(self, tagname, html_tag, tag_style, stylizer):
        anchor = html_tag.get('id') or html_tag.get('name') or None
        bmark = None
        if anchor:
            bmark = self.bookmark_for_anchor(anchor, html_tag)
        if tagname == 'br':
            if html_tag.tail or html_tag is not tuple(
                    html_tag.getparent().iterchildren('*'))[-1]:
                block = self.create_block_from_parent(html_tag, stylizer)
                block.add_break(clear={
                    'both': 'all',
                    'left': 'left',
                    'right': 'right'
                }.get(tag_style['clear'], 'none'),
                                bookmark=bmark)
        elif tagname == 'img':
            block = self.create_block_from_parent(html_tag, stylizer)
            self.images_manager.add_image(html_tag,
                                          block,
                                          stylizer,
                                          bookmark=bmark)
        else:
            if html_tag.text:
                block = self.create_block_from_parent(html_tag, stylizer)
                block.add_text(html_tag.text,
                               tag_style,
                               is_parent_style=False,
                               bookmark=bmark,
                               link=self.current_link,
                               lang=self.current_lang)
            elif bmark:
                block = self.create_block_from_parent(html_tag, stylizer)
                block.add_text('',
                               tag_style,
                               is_parent_style=False,
                               bookmark=bmark,
                               link=self.current_link,
                               lang=self.current_lang)

    def bookmark_for_anchor(self, anchor, html_tag):
        return self.links_manager.bookmark_for_anchor(anchor,
                                                      self.current_item,
                                                      html_tag)

    def write(self):
        self.docx.document, self.docx.styles, body = create_skeleton(self.opts)
        self.blocks.serialize(body)
        body.append(body[0])  # Move <sectPr> to the end
        if self.links_manager.toc:
            self.links_manager.serialize_toc(
                body, self.styles_manager.primary_heading_style)
        if self.cover_img is not None:
            self.images_manager.write_cover_block(body, self.cover_img)
        self.styles_manager.serialize(self.docx.styles)
        self.images_manager.serialize(self.docx.images)
        self.fonts_manager.serialize(self.styles_manager.text_styles,
                                     self.docx.font_table,
                                     self.docx.embedded_fonts, self.docx.fonts)
        self.lists_manager.serialize(self.docx.numbering)

Пример #10

Показать файл

Файл: from_html.py Проект: educhenm/calibre

class Convert(object):

    # Word does not apply default styling to hyperlinks, so we ensure they get
    # default styling (the conversion pipeline does not apply any styling to
    # them).
    base_css = '''
    a[href] { text-decoration: underline; color: blue }
    '''

    def __init__(self, oeb, docx):
        self.oeb, self.docx = oeb, docx
        self.log, self.opts = docx.log, docx.opts

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace)
        self.links_manager = LinksManager(self.docx.namespace, self.docx.document_relationships)
        self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships)
        self.lists_manager = ListsManager(self.docx)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb, self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager, self.links_manager)
        self.current_link = None

        for item in self.oeb.spine:
            self.process_item(item)

        all_blocks = self.blocks.all_blocks
        remove_blocks = []
        for i, block in enumerate(all_blocks):
            try:
                nb = all_blocks[i+1]
            except IndexError:
                break
            block.resolve_skipped(nb)
            if block.skipped:
                remove_blocks.append((i, block))
        for pos, block in reversed(remove_blocks):
            self.blocks.delete_block_at(pos)
        self.blocks.all_blocks[0].is_first_block = True

        self.lists_manager.finalize(all_blocks)
        self.styles_manager.finalize(all_blocks)
        self.write()

    def process_item(self, item):
        self.current_item = item
        stylizer = self.svg_rasterizer.stylizer_cache.get(item)
        if stylizer is None:
            stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.opts.output_profile, base_css=self.base_css)
        self.abshref = self.images_manager.abshref = item.abshref

        for i, body in enumerate(XPath('//h:body')(item.data)):
            with self.blocks:
                body.set('id', body.get('id', None) or self.links_manager.top_anchor)
                self.process_tag(body, stylizer, is_first_tag=i == 0)

    def process_tag(self, html_tag, stylizer, is_first_tag=False, float_spec=None):
        tagname = barename(html_tag.tag)
        if tagname in {'script', 'style', 'title', 'meta'}:
            return
        tag_style = stylizer.style(html_tag)
        if tag_style.is_hidden:
            return

        previous_link = self.current_link
        if tagname == 'a' and html_tag.get('href'):
            self.current_link = (self.current_item, html_tag.get('href'), html_tag.get('title'))

        display = tag_style._get('display')
        is_float = tag_style['float'] in {'left', 'right'} and not is_first_tag
        if float_spec is None and is_float:
            float_spec = FloatSpec(self.docx.namespace, html_tag, tag_style)

        if display in {'inline', 'inline-block'} or tagname == 'br':  # <br> has display:block but we dont want to start a new paragraph
            if is_float and float_spec.is_dropcaps:
                self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)
                float_spec = None
            else:
                self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
        elif display == 'list-item':
            self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_list_item=True)
        elif display.startswith('table') or display == 'inline-table':
            if display == 'table-cell':
                self.blocks.start_new_cell(html_tag, tag_style)
                self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_table_cell=True)
            elif display == 'table-row':
                self.blocks.start_new_row(html_tag, tag_style)
            elif display in {'table', 'inline-table'}:
                self.blocks.end_current_block()
                self.blocks.start_new_table(html_tag, tag_style)
        else:
            if tagname == 'img' and is_float:
                # Image is floating so dont start a new paragraph for it
                self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
            else:
                self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)

        for child in html_tag.iterchildren('*'):
            self.process_tag(child, stylizer, float_spec=float_spec)

        is_block = html_tag in self.blocks.open_html_blocks
        self.blocks.finish_tag(html_tag)
        if is_block and tag_style['page-break-after'] == 'avoid':
            self.blocks.all_blocks[-1].keep_next = True

        self.current_link = previous_link

        if display == 'table-row':
            return  # We ignore the tail for these tags

        ignore_whitespace_tail = is_block or display.startswith('table')
        if not is_first_tag and html_tag.tail and (not ignore_whitespace_tail or not html_tag.tail.isspace()):
            # Ignore trailing space after a block tag, as otherwise it will
            # become a new empty paragraph
            block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
            block.add_text(html_tag.tail, stylizer.style(html_tag.getparent()), is_parent_style=True, link=self.current_link)

    def add_block_tag(self, tagname, html_tag, tag_style, stylizer, is_table_cell=False, float_spec=None, is_list_item=False):
        block = self.blocks.start_new_block(html_tag, tag_style, is_table_cell=is_table_cell, float_spec=float_spec, is_list_item=is_list_item)
        anchor = html_tag.get('id') or html_tag.get('name')
        if anchor:
            block.bookmarks.add(self.bookmark_for_anchor(anchor, html_tag))
        if tagname == 'img':
            self.images_manager.add_image(html_tag, block, stylizer, as_block=True)
        else:
            if html_tag.text:
                block.add_text(html_tag.text, tag_style, ignore_leading_whitespace=True, is_parent_style=True, link=self.current_link)

    def add_inline_tag(self, tagname, html_tag, tag_style, stylizer):
        anchor = html_tag.get('id') or html_tag.get('name') or None
        bmark = None
        if anchor:
            bmark = self.bookmark_for_anchor(anchor, html_tag)
        if tagname == 'br':
            if html_tag.tail or html_tag is not tuple(html_tag.getparent().iterchildren('*'))[-1]:
                block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
                block.add_break(clear={'both':'all', 'left':'left', 'right':'right'}.get(tag_style['clear'], 'none'), bookmark=bmark)
        elif tagname == 'img':
            block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
            self.images_manager.add_image(html_tag, block, stylizer, bookmark=bmark)
        else:
            if html_tag.text:
                block = self.blocks.current_or_new_block(html_tag.getparent(), stylizer.style(html_tag.getparent()))
                block.add_text(html_tag.text, tag_style, is_parent_style=False, bookmark=bmark, link=self.current_link)

    def bookmark_for_anchor(self, anchor, html_tag):
        return self.links_manager.bookmark_for_anchor(anchor, self.current_item, html_tag)

    def write(self):
        self.docx.document, self.docx.styles, body = create_skeleton(self.opts)
        self.blocks.serialize(body)
        body.append(body[0])  # Move <sectPr> to the end
        self.styles_manager.serialize(self.docx.styles)
        self.images_manager.serialize(self.docx.images)
        self.fonts_manager.serialize(self.styles_manager.text_styles, self.docx.font_table, self.docx.embedded_fonts, self.docx.fonts)
        self.lists_manager.serialize(self.docx.numbering)

Пример #11

Показать файл

Файл: from_html.py Проект: aimylios/calibre

class Convert(object):

    # Word does not apply default styling to hyperlinks, so we ensure they get
    # default styling (the conversion pipeline does not apply any styling to
    # them).
    base_css = '''
    a[href] { text-decoration: underline; color: blue }
    '''

    def __init__(self, oeb, docx, mi, add_cover, add_toc):
        self.oeb, self.docx, self.add_cover, self.add_toc = oeb, docx, add_cover, add_toc
        self.log, self.opts = docx.log, docx.opts
        self.mi = mi
        self.cover_img = None
        p = self.opts.output_profile
        p.width_pts, p.height_pts = page_effective_area(self.opts)

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace, self.log, self.mi.language)
        self.links_manager = LinksManager(self.docx.namespace, self.docx.document_relationships, self.log)
        self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships, self.opts)
        self.lists_manager = ListsManager(self.docx)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb, self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager, self.links_manager)
        self.current_link = self.current_lang = None

        for item in self.oeb.spine:
            self.log.debug('Processing', item.href)
            self.process_item(item)
        if self.add_toc:
            self.links_manager.process_toc_links(self.oeb)

        if self.add_cover and self.oeb.metadata.cover and unicode(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
            cover_id = unicode(self.oeb.metadata.cover[0])
            item = self.oeb.manifest.ids[cover_id]
            self.cover_img = self.images_manager.read_image(item.href)

        all_blocks = self.blocks.all_blocks
        remove_blocks = []
        for i, block in enumerate(all_blocks):
            try:
                nb = all_blocks[i+1]
            except IndexError:
                break
            block.resolve_skipped(nb)
            if block.skipped:
                remove_blocks.append((i, block))
        for pos, block in reversed(remove_blocks):
            self.blocks.delete_block_at(pos)
        self.blocks.all_blocks[0].is_first_block = True
        self.blocks.apply_page_break_after()
        self.blocks.resolve_language()

        if self.cover_img is not None:
            self.cover_img = self.images_manager.create_cover_markup(self.cover_img, self.opts.preserve_cover_aspect_ratio, *page_size(self.opts))
        self.lists_manager.finalize(all_blocks)
        self.styles_manager.finalize(all_blocks)
        self.write()

    def process_item(self, item):
        self.current_item = item
        stylizer = self.svg_rasterizer.stylizer_cache.get(item)
        if stylizer is None:
            stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, profile=self.opts.output_profile, base_css=self.base_css)
        self.abshref = self.images_manager.abshref = item.abshref

        self.current_lang = lang_for_tag(item.data) or self.styles_manager.document_lang
        for i, body in enumerate(XPath('//h:body')(item.data)):
            with self.blocks:
                self.blocks.top_bookmark = self.links_manager.bookmark_for_anchor(self.links_manager.top_anchor, self.current_item, body)
                self.process_tag(body, stylizer, is_first_tag=i == 0)

    def process_tag(self, html_tag, stylizer, is_first_tag=False, float_spec=None):
        tagname = barename(html_tag.tag)
        tag_style = stylizer.style(html_tag)
        ignore_tag_contents = tagname in {'script', 'style', 'title', 'meta'} or tag_style.is_hidden
        display = tag_style._get('display')
        is_block = False

        if not ignore_tag_contents:
            previous_link = self.current_link
            if tagname == 'a' and html_tag.get('href'):
                self.current_link = (self.current_item, html_tag.get('href'), html_tag.get('title'))
            previous_lang = self.current_lang
            tag_lang = lang_for_tag(html_tag)
            if tag_lang:
                self.current_lang = tag_lang

            is_float = tag_style['float'] in {'left', 'right'} and not is_first_tag
            if float_spec is None and is_float:
                float_spec = FloatSpec(self.docx.namespace, html_tag, tag_style)

            if display in {'inline', 'inline-block'} or tagname == 'br':  # <br> has display:block but we dont want to start a new paragraph
                if is_float and float_spec.is_dropcaps:
                    self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)
                    float_spec = None
                else:
                    self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
            elif display == 'list-item':
                self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_list_item=True)
            elif display.startswith('table') or display == 'inline-table':
                if display == 'table-cell':
                    self.blocks.start_new_cell(html_tag, tag_style)
                    self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_table_cell=True)
                elif display == 'table-row':
                    self.blocks.start_new_row(html_tag, tag_style)
                elif display in {'table', 'inline-table'}:
                    self.blocks.end_current_block()
                    self.blocks.start_new_table(html_tag, tag_style)
            else:
                if tagname == 'img' and is_float:
                    # Image is floating so dont start a new paragraph for it
                    self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
                else:
                    if tagname == 'hr':
                        for edge in 'right bottom left'.split():
                            tag_style.set('border-%s-style' % edge, 'none')
                    self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)

            for child in html_tag.iterchildren():
                if isinstance(getattr(child, 'tag', None), basestring):
                    self.process_tag(child, stylizer, float_spec=float_spec)
                else:  # Comment/PI/etc.
                    tail = getattr(child, 'tail', None)
                    if tail:
                        block = self.create_block_from_parent(html_tag, stylizer)
                        block.add_text(tail, tag_style, is_parent_style=False, link=self.current_link, lang=self.current_lang)

            is_block = html_tag in self.blocks.open_html_blocks
            self.blocks.finish_tag(html_tag)
            if is_block and tag_style['page-break-after'] == 'avoid':
                self.blocks.all_blocks[-1].keep_next = True

            self.current_link = previous_link
            self.current_lang = previous_lang

        # Now, process the tail if any

        if display == 'table-row':
            return  # We ignore the tail for these tags

        ignore_whitespace_tail = is_block or display.startswith('table')
        if not is_first_tag and html_tag.tail and (not ignore_whitespace_tail or not html_tag.tail.isspace()):
            # Ignore trailing space after a block tag, as otherwise it will
            # become a new empty paragraph
            block = self.create_block_from_parent(html_tag, stylizer)
            block.add_text(html_tag.tail, stylizer.style(html_tag.getparent()), is_parent_style=True, link=self.current_link, lang=self.current_lang)

    def create_block_from_parent(self, html_tag, stylizer):
        parent = html_tag.getparent()
        block = self.blocks.current_or_new_block(parent, stylizer.style(parent))
        # Do not inherit page-break-before from parent
        block.page_break_before = False
        return block

    def add_block_tag(self, tagname, html_tag, tag_style, stylizer, is_table_cell=False, float_spec=None, is_list_item=False):
        block = self.blocks.start_new_block(
            html_tag, tag_style, is_table_cell=is_table_cell, float_spec=float_spec, is_list_item=is_list_item)
        anchor = html_tag.get('id') or html_tag.get('name')
        if anchor:
            block.bookmarks.add(self.bookmark_for_anchor(anchor, html_tag))
        if tagname == 'img':
            self.images_manager.add_image(html_tag, block, stylizer, as_block=True)
        else:
            if html_tag.text:
                block.add_text(html_tag.text, tag_style, ignore_leading_whitespace=True, is_parent_style=True, link=self.current_link, lang=self.current_lang)

    def add_inline_tag(self, tagname, html_tag, tag_style, stylizer):
        anchor = html_tag.get('id') or html_tag.get('name') or None
        bmark = None
        if anchor:
            bmark = self.bookmark_for_anchor(anchor, html_tag)
        if tagname == 'br':
            if html_tag.tail or html_tag is not tuple(html_tag.getparent().iterchildren('*'))[-1]:
                block = self.create_block_from_parent(html_tag, stylizer)
                block.add_break(clear={'both':'all', 'left':'left', 'right':'right'}.get(tag_style['clear'], 'none'), bookmark=bmark)
        elif tagname == 'img':
            block = self.create_block_from_parent(html_tag, stylizer)
            self.images_manager.add_image(html_tag, block, stylizer, bookmark=bmark)
        else:
            if html_tag.text:
                block = self.create_block_from_parent(html_tag, stylizer)
                block.add_text(html_tag.text, tag_style, is_parent_style=False, bookmark=bmark, link=self.current_link, lang=self.current_lang)
            elif bmark:
                block = self.create_block_from_parent(html_tag, stylizer)
                block.add_text('', tag_style, is_parent_style=False, bookmark=bmark, link=self.current_link, lang=self.current_lang)

    def bookmark_for_anchor(self, anchor, html_tag):
        return self.links_manager.bookmark_for_anchor(anchor, self.current_item, html_tag)

    def write(self):
        self.docx.document, self.docx.styles, body = create_skeleton(self.opts)
        self.blocks.serialize(body)
        body.append(body[0])  # Move <sectPr> to the end
        if self.links_manager.toc:
            self.links_manager.serialize_toc(body, self.styles_manager.primary_heading_style)
        if self.cover_img is not None:
            self.images_manager.write_cover_block(body, self.cover_img)
        self.styles_manager.serialize(self.docx.styles)
        self.images_manager.serialize(self.docx.images)
        self.fonts_manager.serialize(self.styles_manager.text_styles, self.docx.font_table, self.docx.embedded_fonts, self.docx.fonts)
        self.lists_manager.serialize(self.docx.numbering)

Пример #12

Показать файл

Файл: from_html.py Проект: tomschlenkhoff/calibre

class Convert(object):

    # Word does not apply default styling to hyperlinks, so we ensure they get
    # default styling (the conversion pipeline does not apply any styling to
    # them).
    base_css = '''
    a[href] { text-decoration: underline; color: blue }
    '''

    def __init__(self, oeb, docx):
        self.oeb, self.docx = oeb, docx
        self.log, self.opts = docx.log, docx.opts

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager(self.docx.namespace)
        self.links_manager = LinksManager(self.docx.namespace,
                                          self.docx.document_relationships)
        self.images_manager = ImagesManager(self.oeb,
                                            self.docx.document_relationships)
        self.lists_manager = ListsManager(self.docx)
        self.fonts_manager = FontsManager(self.docx.namespace, self.oeb,
                                          self.opts)
        self.blocks = Blocks(self.docx.namespace, self.styles_manager,
                             self.links_manager)
        self.current_link = None

        for item in self.oeb.spine:
            self.process_item(item)

        all_blocks = self.blocks.all_blocks
        remove_blocks = []
        for i, block in enumerate(all_blocks):
            try:
                nb = all_blocks[i + 1]
            except IndexError:
                break
            block.resolve_skipped(nb)
            if block.skipped:
                remove_blocks.append((i, block))
        for pos, block in reversed(remove_blocks):
            self.blocks.delete_block_at(pos)
        self.blocks.all_blocks[0].is_first_block = True

        self.lists_manager.finalize(all_blocks)
        self.styles_manager.finalize(all_blocks)
        self.write()

    def process_item(self, item):
        self.current_item = item
        stylizer = self.svg_rasterizer.stylizer_cache.get(item)
        if stylizer is None:
            stylizer = Stylizer(item.data,
                                item.href,
                                self.oeb,
                                self.opts,
                                self.opts.output_profile,
                                base_css=self.base_css)
        self.abshref = self.images_manager.abshref = item.abshref

        for i, body in enumerate(XPath('//h:body')(item.data)):
            with self.blocks:
                body.set('id',
                         body.get('id', None) or self.links_manager.top_anchor)
                self.process_tag(body, stylizer, is_first_tag=i == 0)

    def process_tag(self,
                    html_tag,
                    stylizer,
                    is_first_tag=False,
                    float_spec=None):
        tagname = barename(html_tag.tag)
        if tagname in {'script', 'style', 'title', 'meta'}:
            return
        tag_style = stylizer.style(html_tag)
        if tag_style.is_hidden:
            return

        previous_link = self.current_link
        if tagname == 'a' and html_tag.get('href'):
            self.current_link = (self.current_item, html_tag.get('href'),
                                 html_tag.get('title'))

        display = tag_style._get('display')
        is_float = tag_style['float'] in {'left', 'right'} and not is_first_tag
        if float_spec is None and is_float:
            float_spec = FloatSpec(self.docx.namespace, html_tag, tag_style)

        if display in {
                'inline', 'inline-block'
        } or tagname == 'br':  # <br> has display:block but we dont want to start a new paragraph
            if is_float and float_spec.is_dropcaps:
                self.add_block_tag(tagname,
                                   html_tag,
                                   tag_style,
                                   stylizer,
                                   float_spec=float_spec)
                float_spec = None
            else:
                self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
        elif display == 'list-item':
            self.add_block_tag(tagname,
                               html_tag,
                               tag_style,
                               stylizer,
                               is_list_item=True)
        elif display.startswith('table') or display == 'inline-table':
            if display == 'table-cell':
                self.blocks.start_new_cell(html_tag, tag_style)
                self.add_block_tag(tagname,
                                   html_tag,
                                   tag_style,
                                   stylizer,
                                   is_table_cell=True)
            elif display == 'table-row':
                self.blocks.start_new_row(html_tag, tag_style)
            elif display in {'table', 'inline-table'}:
                self.blocks.end_current_block()
                self.blocks.start_new_table(html_tag, tag_style)
        else:
            if tagname == 'img' and is_float:
                # Image is floating so dont start a new paragraph for it
                self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
            else:
                self.add_block_tag(tagname,
                                   html_tag,
                                   tag_style,
                                   stylizer,
                                   float_spec=float_spec)

        for child in html_tag.iterchildren('*'):
            self.process_tag(child, stylizer, float_spec=float_spec)

        is_block = html_tag in self.blocks.open_html_blocks
        self.blocks.finish_tag(html_tag)
        if is_block and tag_style['page-break-after'] == 'avoid':
            self.blocks.all_blocks[-1].keep_next = True

        self.current_link = previous_link

        if display == 'table-row':
            return  # We ignore the tail for these tags

        ignore_whitespace_tail = is_block or display.startswith('table')
        if not is_first_tag and html_tag.tail and (
                not ignore_whitespace_tail or not html_tag.tail.isspace()):
            # Ignore trailing space after a block tag, as otherwise it will
            # become a new empty paragraph
            block = self.blocks.current_or_new_block(
                html_tag.getparent(), stylizer.style(html_tag.getparent()))
            block.add_text(html_tag.tail,
                           stylizer.style(html_tag.getparent()),
                           is_parent_style=True,
                           link=self.current_link)

    def add_block_tag(self,
                      tagname,
                      html_tag,
                      tag_style,
                      stylizer,
                      is_table_cell=False,
                      float_spec=None,
                      is_list_item=False):
        block = self.blocks.start_new_block(html_tag,
                                            tag_style,
                                            is_table_cell=is_table_cell,
                                            float_spec=float_spec,
                                            is_list_item=is_list_item)
        anchor = html_tag.get('id') or html_tag.get('name')
        if anchor:
            block.bookmarks.add(self.bookmark_for_anchor(anchor, html_tag))
        if tagname == 'img':
            self.images_manager.add_image(html_tag,
                                          block,
                                          stylizer,
                                          as_block=True)
        else:
            if html_tag.text:
                block.add_text(html_tag.text,
                               tag_style,
                               ignore_leading_whitespace=True,
                               is_parent_style=True,
                               link=self.current_link)

    def add_inline_tag(self, tagname, html_tag, tag_style, stylizer):
        anchor = html_tag.get('id') or html_tag.get('name') or None
        bmark = None
        if anchor:
            bmark = self.bookmark_for_anchor(anchor, html_tag)
        if tagname == 'br':
            if html_tag.tail or html_tag is not tuple(
                    html_tag.getparent().iterchildren('*'))[-1]:
                block = self.blocks.current_or_new_block(
                    html_tag.getparent(), stylizer.style(html_tag.getparent()))
                block.add_break(clear={
                    'both': 'all',
                    'left': 'left',
                    'right': 'right'
                }.get(tag_style['clear'], 'none'),
                                bookmark=bmark)
        elif tagname == 'img':
            block = self.blocks.current_or_new_block(
                html_tag.getparent(), stylizer.style(html_tag.getparent()))
            self.images_manager.add_image(html_tag,
                                          block,
                                          stylizer,
                                          bookmark=bmark)
        else:
            if html_tag.text:
                block = self.blocks.current_or_new_block(
                    html_tag.getparent(), stylizer.style(html_tag.getparent()))
                block.add_text(html_tag.text,
                               tag_style,
                               is_parent_style=False,
                               bookmark=bmark,
                               link=self.current_link)

    def bookmark_for_anchor(self, anchor, html_tag):
        return self.links_manager.bookmark_for_anchor(anchor,
                                                      self.current_item,
                                                      html_tag)

    def write(self):
        self.docx.document, self.docx.styles, body = create_skeleton(self.opts)
        self.blocks.serialize(body)
        body.append(body[0])  # Move <sectPr> to the end
        self.styles_manager.serialize(self.docx.styles)
        self.images_manager.serialize(self.docx.images)
        self.fonts_manager.serialize(self.styles_manager.text_styles,
                                     self.docx.font_table,
                                     self.docx.embedded_fonts, self.docx.fonts)
        self.lists_manager.serialize(self.docx.numbering)

Пример #13

Показать файл

Файл: from_html.py Проект: v0re/calibre

class Convert(object):

    def __init__(self, oeb, docx):
        self.oeb, self.docx = oeb, docx
        self.log, self.opts = docx.log, docx.opts

        self.blocks = []

    def __call__(self):
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
        self.svg_rasterizer = SVGRasterizer()
        self.svg_rasterizer(self.oeb, self.opts)

        self.styles_manager = StylesManager()
        self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships)
        self.fonts_manager = FontsManager(self.oeb)

        for item in self.oeb.spine:
            self.process_item(item)

        self.styles_manager.finalize(self.blocks)
        self.write()

    def process_item(self, item):
        stylizer = self.svg_rasterizer.stylizer_cache.get(item)
        if stylizer is None:
            stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.opts.output_profile)
        self.abshref = self.images_manager.abshref = item.abshref

        is_first_block = True
        for body in XPath('//h:body')(item.data):
            b = Block(self.styles_manager, body, stylizer.style(body), is_first_block=is_first_block)
            self.blocks.append(b)
            is_first_block = False
            self.process_block(body, b, stylizer, ignore_tail=True)
        if self.blocks and self.blocks[0].is_empty():
            del self.blocks[0]

    def process_block(self, html_block, docx_block, stylizer, ignore_tail=False):
        block_style = stylizer.style(html_block)
        if block_style.is_hidden:
            return
        if html_block.tag.endswith('}img'):
            self.images_manager.add_image(html_block, docx_block, stylizer)
        else:
            if html_block.text:
                docx_block.add_text(html_block.text, block_style, ignore_leading_whitespace=True, is_parent_style=True)

            for child in html_block.iterchildren(etree.Element):
                tag = barename(child.tag)
                style = stylizer.style(child)
                display = style._get('display')
                if display == 'block' and tag != 'br':
                    if tag == 'img' and style['float'] in {'left', 'right'}:
                        # Image is floating so dont start a new paragraph for
                        # it
                        self.process_inline(child, self.blocks[-1], stylizer)
                    else:
                        b = Block(self.styles_manager, child, style)
                        self.blocks.append(b)
                        self.process_block(child, b, stylizer)
                else:
                    self.process_inline(child, self.blocks[-1], stylizer)

        if block_style['page-break-after'] == 'avoid':
            self.blocks[-1].keep_next = True

        if ignore_tail is False and html_block.tail and html_block.tail.strip():
            style = stylizer.style(html_block.getparent())
            b = Block(self.styles_manager, html_block.getparent(), style)
            self.blocks.append(b)
            b.add_text(html_block.tail, style, is_parent_style=True)

    def process_inline(self, html_child, docx_block, stylizer):
        tag = barename(html_child.tag)
        style = stylizer.style(html_child)
        if style.is_hidden:
            return
        if tag == 'br':
            if html_child.tail or html_child is not html_child.getparent()[-1]:
                docx_block.add_break(clear={'both':'all', 'left':'left', 'right':'right'}.get(style['clear'], 'none'))
        elif tag == 'img':
            self.images_manager.add_image(html_child, docx_block, stylizer)
        else:
            if html_child.text:
                docx_block.add_text(html_child.text, style, html_parent=html_child)
            for child in html_child.iterchildren(etree.Element):
                style = stylizer.style(child)
                display = style.get('display', 'inline')
                if display == 'block':
                    b = Block(self.styles_manager, child, style)
                    self.blocks.append(b)
                    self.process_block(child, b, stylizer)
                else:
                    self.process_inline(child, self.blocks[-1], stylizer)

        if html_child.tail:
            self.blocks[-1].add_text(html_child.tail, stylizer.style(html_child.getparent()), html_parent=html_child.getparent(), is_parent_style=True)

    def write(self):
        dn = {k:v for k, v in namespaces.iteritems() if k in {'w', 'r', 'm', 've', 'o', 'wp', 'w10', 'wne', 'a', 'pic'}}
        E = ElementMaker(namespace=dn['w'], nsmap=dn)
        self.docx.document = doc = E.document()
        body = E.body()
        doc.append(body)
        for block in self.blocks:
            block.serialize(body)
        width, height = PAPER_SIZES[self.opts.docx_page_size]
        if self.opts.docx_custom_page_size is not None:
            width, height = map(float, self.opts.docx_custom_page_size.partition('x')[0::2])
        width, height = int(20 * width), int(20 * height)
        def margin(which):
            return w(which), str(int(getattr(self.opts, 'margin_'+which) * 20))
        body.append(E.sectPr(
            E.pgSz(**{w('w'):str(width), w('h'):str(height)}),
            E.pgMar(**dict(map(margin, 'left top right bottom'.split()))),
            E.cols(**{w('space'):'720'}),
            E.docGrid(**{w('linePitch'):"360"}),
        ))

        dn = {k:v for k, v in namespaces.iteritems() if k in tuple('wra') + ('wp',)}
        E = ElementMaker(namespace=dn['w'], nsmap=dn)
        self.docx.styles = E.styles(
            E.docDefaults(
                E.rPrDefault(
                    E.rPr(
                        E.rFonts(**{w('asciiTheme'):"minorHAnsi", w('eastAsiaTheme'):"minorEastAsia", w('hAnsiTheme'):"minorHAnsi", w('cstheme'):"minorBidi"}),
                        E.sz(**{w('val'):'22'}),
                        E.szCs(**{w('val'):'22'}),
                        E.lang(**{w('val'):'en-US', w('eastAsia'):"en-US", w('bidi'):"ar-SA"})
                    )
                ),
                E.pPrDefault(
                    E.pPr(
                        E.spacing(**{w('after'):"0", w('line'):"276", w('lineRule'):"auto"})
                    )
                )
            )
        )
        self.docx.images = {}
        self.styles_manager.serialize(self.docx.styles)
        self.images_manager.serialize(self.docx.images)
        self.fonts_manager.serialize(self.styles_manager.text_styles, self.docx.font_table, self.docx.embedded_fonts)