示例#1
0
    def convert_images(self, pages, opts, wide):
        from calibre.ebooks.lrf.pylrs.pylrs import Book, BookSetting, ImageStream, ImageBlock
        from uuid import uuid4
        from calibre.constants import __appname__, __version__

        width, height = (784, 1012) if wide else (584, 754)

        ps = {}
        ps['topmargin']      = 0
        ps['evensidemargin'] = 0
        ps['oddsidemargin']  = 0
        ps['textwidth']      = width
        ps['textheight']     = height
        book = Book(title=opts.title, author=opts.author,
                bookid=uuid4().hex,
                publisher='%s %s'%(__appname__, __version__),
                category=_('Comic'), pagestyledefault=ps,
                booksetting=BookSetting(screenwidth=width, screenheight=height))
        for page in pages:
            imageStream = ImageStream(page)
            _page = book.create_page()
            _page.append(ImageBlock(refstream=imageStream,
                        blockwidth=width, blockheight=height, xsize=width,
                        ysize=height, x1=width, y1=height))
            book.append(_page)

        book.renderLrf(open(opts.output, 'wb'))
示例#2
0
    def convert_images(self, pages, opts, wide):
        from calibre.ebooks.lrf.pylrs.pylrs import Book, BookSetting, ImageStream, ImageBlock
        from uuid import uuid4
        from calibre.constants import __appname__, __version__

        width, height = (784, 1012) if wide else (584, 754)

        ps = {}
        ps['topmargin'] = 0
        ps['evensidemargin'] = 0
        ps['oddsidemargin'] = 0
        ps['textwidth'] = width
        ps['textheight'] = height
        book = Book(title=opts.title,
                    author=opts.author,
                    bookid=uuid4().hex,
                    publisher='%s %s' % (__appname__, __version__),
                    category=_('Comic'),
                    pagestyledefault=ps,
                    booksetting=BookSetting(screenwidth=width,
                                            screenheight=height))
        for page in pages:
            imageStream = ImageStream(page)
            _page = book.create_page()
            _page.append(
                ImageBlock(refstream=imageStream,
                           blockwidth=width,
                           blockheight=height,
                           xsize=width,
                           ysize=height,
                           x1=width,
                           y1=height))
            book.append(_page)

        book.renderLrf(open(opts.output, 'wb'))
示例#3
0
class LrsParser(object):

    SELF_CLOSING_TAGS = [
        i.lower() for i in [
            'CR', 'Plot', 'NoBR', 'Space', 'PutObj', 'RuledLine', 'Plot',
            'SetDefault', 'BookSetting', 'RegistFont', 'PageStyle',
            'TextStyle', 'BlockStyle', 'JumpTo', 'ImageStream', 'Image'
        ]
    ]

    def __init__(self, stream, logger):
        self.logger = logger
        src = stream.read()
        self.soup = BeautifulStoneSoup(
            xml_to_unicode(src)[0],
            convertEntities=BeautifulStoneSoup.XML_ENTITIES,
            selfClosingTags=self.SELF_CLOSING_TAGS)
        self.objects = {}
        for obj in self.soup.findAll(objid=True):
            self.objects[obj['objid']] = obj

        self.parsed_objects = {}
        self.first_pass()
        self.second_pass()
        self.third_pass()
        self.fourth_pass()
        self.fifth_pass()

    def fifth_pass(self):
        for tag in self.soup.findAll(['canvas', 'header', 'footer']):
            canvas = self.parsed_objects[tag.get('objid')]
            for po in tag.findAll('putobj'):
                canvas.put_object(self.parsed_objects[po.get('refobj')],
                                  po.get('x1'), po.get('y1'))

    @classmethod
    def attrs_to_dict(cls, tag, exclude=('objid', )):
        result = {}
        for key, val in tag.attrs:
            if key in exclude:
                continue
            result[str(key)] = val
        return result

    def text_tag_to_element(self, tag):
        map = {
            'span': Span,
            'italic': Italic,
            'bold': Bold,
            'empline': EmpLine,
            'sup': Sup,
            'sub': Sub,
            'cr': CR,
            'drawchar': DropCaps,
        }
        if tag.name == 'charbutton':
            return CharButton(self.parsed_objects[tag.get('refobj')], None)
        if tag.name == 'plot':
            return Plot(self.parsed_objects[tag.get('refobj')],
                        **self.attrs_to_dict(tag, ['refobj']))
        settings = self.attrs_to_dict(tag)
        settings.pop('spanstyle', '')
        return map[tag.name](**settings)

    def process_text_element(self, tag, elem):
        for item in tag.contents:
            if isinstance(item, NavigableString):
                elem.append(item.string)
            else:
                subelem = self.text_tag_to_element(item)
                elem.append(subelem)
                self.process_text_element(item, subelem)

    def process_paragraph(self, tag):
        p = Paragraph()
        contents = [i for i in tag.contents]
        if contents:
            if isinstance(contents[0], NavigableString):
                contents[0] = contents[0].string.lstrip()
            for item in contents:
                if isinstance(item, basestring):
                    p.append(item)
                elif isinstance(item, NavigableString):
                    p.append(item.string)
                else:
                    elem = self.text_tag_to_element(item)
                    p.append(elem)
                    self.process_text_element(item, elem)
        return p

    def process_text_block(self, tag):
        tb = self.parsed_objects[tag.get('objid')]
        for item in tag.contents:
            if hasattr(item, 'name'):
                if item.name == 'p':
                    tb.append(self.process_paragraph(item))
                elif item.name == 'cr':
                    tb.append(CR())
                elif item.name == 'charbutton':  # BookDesigner does this
                    p = Paragraph()
                    tb.append(p)
                    elem = self.text_tag_to_element(item)
                    self.process_text_element(item, elem)
                    p.append(elem)

    def fourth_pass(self):
        for tag in self.soup.findAll('page'):
            page = self.parsed_objects[tag.get('objid')]
            self.book.append(page)
            for block_tag in tag.findAll([
                    'canvas', 'imageblock', 'textblock', 'ruledline',
                    'simpletextblock'
            ]):
                if block_tag.name == 'ruledline':
                    page.append(RuledLine(**self.attrs_to_dict(block_tag)))
                else:
                    page.append(self.parsed_objects[block_tag.get('objid')])

        for tag in self.soup.find('objects').findAll('button'):
            jt = tag.find('jumpto')
            tb = self.parsed_objects[jt.get('refobj')]
            jb = JumpButton(tb)
            self.book.append(jb)
            self.parsed_objects[tag.get('objid')] = jb

        for tag in self.soup.findAll(['textblock', 'simpletextblock']):
            self.process_text_block(tag)
        toc = self.soup.find('toc')
        if toc:
            for tag in toc.findAll('toclabel'):
                label = self.tag_to_string(tag)
                self.book.addTocEntry(label,
                                      self.parsed_objects[tag.get('refobj')])

    def third_pass(self):
        map = {
            'page': (Page, [
                'pagestyle', 'evenfooterid', 'oddfooterid', 'evenheaderid',
                'oddheaderid'
            ]),
            'textblock': (TextBlock, ['textstyle', 'blockstyle']),
            'simpletextblock': (TextBlock, ['textstyle', 'blockstyle']),
            'imageblock': (ImageBlock, ['blockstyle', 'refstream']),
            'image': (Image, ['refstream']),
            'canvas': (Canvas, ['canvaswidth', 'canvasheight']),
        }
        attrmap = {
            'pagestyle': 'pageStyle',
            'blockstyle': 'blockStyle',
            'textstyle': 'textStyle',
        }
        for id, tag in self.objects.items():
            if tag.name in map.keys():
                settings = self.attrs_to_dict(
                    tag, map[tag.name][1] + ['objid', 'objlabel'])
                for a in ('pagestyle', 'blockstyle', 'textstyle'):
                    label = tag.get(a, False)
                    if label and \
                        (label in self._style_labels or label in self.parsed_objects):
                        _obj = self.parsed_objects[label] if \
                            self.parsed_objects.has_key(label) else \
                            self._style_labels[label]
                        settings[attrmap[a]] = _obj
                for a in ('evenfooterid', 'oddfooterid', 'evenheaderid',
                          'oddheaderid'):
                    if tag.has_key(a):
                        settings[a.replace(
                            'id', '')] = self.parsed_objects[tag.get(a)]
                args = []
                if tag.has_key('refstream'):
                    args.append(self.parsed_objects[tag.get('refstream')])
                if tag.has_key('canvaswidth'):
                    args += [tag.get('canvaswidth'), tag.get('canvasheight')]
                self.parsed_objects[id] = map[tag.name][0](*args, **settings)

    def second_pass(self):
        map = {
            'pagestyle': (PageStyle, [
                'stylelabel', 'evenheaderid', 'oddheaderid', 'evenfooterid',
                'oddfooterid'
            ]),
            'textstyle': (TextStyle, ['stylelabel', 'rubyalignandadjust']),
            'blockstyle': (BlockStyle, ['stylelabel']),
            'imagestream': (ImageStream, ['imagestreamlabel']),
            'registfont': (Font, [])
        }
        self._style_labels = {}
        for id, tag in self.objects.items():
            if tag.name in map.keys():
                settings = self.attrs_to_dict(tag,
                                              map[tag.name][1] + ['objid'])
                if tag.name == 'pagestyle':
                    for a in ('evenheaderid', 'oddheaderid', 'evenfooterid',
                              'oddfooterid'):
                        if tag.has_key(a):
                            settings[a.replace(
                                'id', '')] = self.parsed_objects[tag.get(a)]
                settings.pop('autoindex', '')
                self.parsed_objects[id] = map[tag.name][0](**settings)
                x = tag.get('stylelabel', False)
                if x:
                    self._style_labels[x] = self.parsed_objects[id]
                if tag.name == 'registfont':
                    self.book.append(self.parsed_objects[id])

    @classmethod
    def tag_to_string(cls, tag):
        '''
        Convenience method to take a BeautifulSoup Tag and extract the text from it
        recursively.
        @return: A unicode (possibly empty) object
        '''
        if not tag:
            return ''
        strings = []
        for item in tag.contents:
            if isinstance(item, (NavigableString, CData)):
                strings.append(item.string)
            elif isinstance(item, Tag):
                res = cls.tag_to_string(item)
                if res:
                    strings.append(res)
        return u''.join(strings)

    def first_pass(self):
        info = self.soup.find('bbebxylog').find('bookinformation').find('info')
        bookinfo = info.find('bookinfo')
        docinfo = info.find('docinfo')

        def me(base, tagname):
            tag = base.find(tagname.lower())
            if tag is None:
                return ('', '', '')
            tag = (self.tag_to_string(tag),
                   tag.get('reading') if tag.has_key('reading') else '')
            return tag

        title = me(bookinfo, 'Title')
        author = me(bookinfo, 'Author')
        publisher = me(bookinfo, 'Publisher')
        category = me(bookinfo, 'Category')[0]
        classification = me(bookinfo, 'Classification')[0]
        freetext = me(bookinfo, 'FreeText')[0]
        language = me(docinfo, 'Language')[0]
        creator = me(docinfo, 'Creator')[0]
        producer = me(docinfo, 'Producer')[0]
        bookid = me(bookinfo, 'BookID')[0]

        sd = self.soup.find('setdefault')
        sd = StyleDefault(
            **self.attrs_to_dict(sd, ['page_tree_id', 'rubyalignandadjust']))
        bs = self.soup.find('booksetting')
        bs = BookSetting(**self.attrs_to_dict(bs, []))

        settings = {}
        thumbnail = self.soup.find('cthumbnail')
        if thumbnail is not None:
            f = thumbnail['file']
            if os.access(f, os.R_OK):
                settings['thumbnail'] = f
            else:
                print _('Could not read from thumbnail file:'), f

        self.book = Book(title=title,
                         author=author,
                         publisher=publisher,
                         category=category,
                         classification=classification,
                         freetext=freetext,
                         language=language,
                         creator=creator,
                         producer=producer,
                         bookid=bookid,
                         setdefault=sd,
                         booksetting=bs,
                         **settings)

        for hdr in self.soup.findAll(['header', 'footer']):
            elem = Header if hdr.name == 'header' else Footer
            self.parsed_objects[hdr.get('objid')] = elem(
                **self.attrs_to_dict(hdr))

    def render(self, file, to_lrs=False):
        if to_lrs:
            self.book.renderLrs(file, 'utf-8')
        else:
            self.book.renderLrf(file)
示例#4
0
class LrsParser(object):

    SELF_CLOSING_TAGS = [i.lower() for i in ['CR', 'Plot', 'NoBR', 'Space',
                         'PutObj', 'RuledLine',
                         'Plot', 'SetDefault', 'BookSetting', 'RegistFont',
                         'PageStyle', 'TextStyle', 'BlockStyle', 'JumpTo',
                         'ImageStream', 'Image']]

    def __init__(self, stream, logger):
        self.logger = logger
        src = stream.read()
        self.soup = BeautifulStoneSoup(xml_to_unicode(src)[0],
                       convertEntities=BeautifulStoneSoup.XML_ENTITIES,
                       selfClosingTags=self.SELF_CLOSING_TAGS)
        self.objects = {}
        for obj in self.soup.findAll(objid=True):
            self.objects[obj['objid']] = obj

        self.parsed_objects = {}
        self.first_pass()
        self.second_pass()
        self.third_pass()
        self.fourth_pass()
        self.fifth_pass()

    def fifth_pass(self):
        for tag in self.soup.findAll(['canvas', 'header', 'footer']):
            canvas = self.parsed_objects[tag.get('objid')]
            for po in tag.findAll('putobj'):
                canvas.put_object(self.parsed_objects[po.get('refobj')],
                                  po.get('x1'), po.get('y1'))


    @classmethod
    def attrs_to_dict(cls, tag, exclude=('objid',)):
        result = {}
        for key, val in tag.attrs:
            if key in exclude:
                continue
            result[str(key)] = val
        return result

    def text_tag_to_element(self, tag):
        map = {
               'span'    : Span,
               'italic'  : Italic,
               'bold'    : Bold,
               'empline' : EmpLine,
               'sup'     : Sup,
               'sub'     : Sub,
               'cr'      : CR,
               'drawchar': DropCaps,
               }
        if tag.name == 'charbutton':
            return CharButton(self.parsed_objects[tag.get('refobj')], None)
        if tag.name == 'plot':
            return Plot(self.parsed_objects[tag.get('refobj')], **self.attrs_to_dict(tag, ['refobj']))
        settings = self.attrs_to_dict(tag)
        settings.pop('spanstyle', '')
        return map[tag.name](**settings)

    def process_text_element(self, tag, elem):
        for item in tag.contents:
            if isinstance(item, NavigableString):
                elem.append(item.string)
            else:
                subelem = self.text_tag_to_element(item)
                elem.append(subelem)
                self.process_text_element(item, subelem)


    def process_paragraph(self, tag):
        p = Paragraph()
        contents = [i for i in tag.contents]
        if contents:
            if isinstance(contents[0], NavigableString):
                contents[0] = contents[0].string.lstrip()
            for item in contents:
                if isinstance(item, basestring):
                    p.append(item)
                elif isinstance(item, NavigableString):
                    p.append(item.string)
                else:
                    elem = self.text_tag_to_element(item)
                    p.append(elem)
                    self.process_text_element(item, elem)
        return p

    def process_text_block(self, tag):
        tb = self.parsed_objects[tag.get('objid')]
        for item in tag.contents:
            if hasattr(item, 'name'):
                if item.name == 'p':
                    tb.append(self.process_paragraph(item))
                elif item.name == 'cr':
                    tb.append(CR())
                elif item.name == 'charbutton': # BookDesigner does this
                    p = Paragraph()
                    tb.append(p)
                    elem = self.text_tag_to_element(item)
                    self.process_text_element(item, elem)
                    p.append(elem)

    def fourth_pass(self):
        for tag in self.soup.findAll('page'):
            page = self.parsed_objects[tag.get('objid')]
            self.book.append(page)
            for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock',
                                          'ruledline', 'simpletextblock']):
                if block_tag.name == 'ruledline':
                    page.append(RuledLine(**self.attrs_to_dict(block_tag)))
                else:
                    page.append(self.parsed_objects[block_tag.get('objid')])

        for tag in self.soup.find('objects').findAll('button'):
            jt = tag.find('jumpto')
            tb = self.parsed_objects[jt.get('refobj')]
            jb = JumpButton(tb)
            self.book.append(jb)
            self.parsed_objects[tag.get('objid')] = jb

        for tag in self.soup.findAll(['textblock', 'simpletextblock']):
            self.process_text_block(tag)
        toc = self.soup.find('toc')
        if toc:
            for tag in toc.findAll('toclabel'):
                label = self.tag_to_string(tag)
                self.book.addTocEntry(label, self.parsed_objects[tag.get('refobj')])


    def third_pass(self):
        map = {
               'page'       : (Page, ['pagestyle', 'evenfooterid',
                                      'oddfooterid', 'evenheaderid', 'oddheaderid']),
               'textblock'  : (TextBlock, ['textstyle', 'blockstyle']),
               'simpletextblock'  : (TextBlock, ['textstyle', 'blockstyle']),
               'imageblock' : (ImageBlock, ['blockstyle', 'refstream']),
               'image'      : (Image, ['refstream']),
               'canvas'     : (Canvas, ['canvaswidth', 'canvasheight']),
               }
        attrmap = {
                   'pagestyle'  : 'pageStyle',
                   'blockstyle' : 'blockStyle',
                   'textstyle'  : 'textStyle',
                   }
        for id, tag in self.objects.items():
            if tag.name in map.keys():
                settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid', 'objlabel'])
                for a in ('pagestyle', 'blockstyle', 'textstyle'):
                    label = tag.get(a, False)
                    if label and \
                        (label in self._style_labels or label in self.parsed_objects):
                        _obj = self.parsed_objects[label] if \
                            self.parsed_objects.has_key(label) else \
                            self._style_labels[label]
                        settings[attrmap[a]] = _obj
                for a in ('evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid'):
                    if tag.has_key(a):
                        settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)]
                args = []
                if tag.has_key('refstream'):
                    args.append(self.parsed_objects[tag.get('refstream')])
                if tag.has_key('canvaswidth'):
                    args += [tag.get('canvaswidth'), tag.get('canvasheight')]
                self.parsed_objects[id] = map[tag.name][0](*args, **settings)



    def second_pass(self):
        map = {
               'pagestyle'  : (PageStyle, ['stylelabel', 'evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid']),
               'textstyle'  : (TextStyle, ['stylelabel', 'rubyalignandadjust']),
               'blockstyle' : (BlockStyle, ['stylelabel']),
               'imagestream': (ImageStream, ['imagestreamlabel']),
               'registfont' : (Font, [])
               }
        self._style_labels = {}
        for id, tag in self.objects.items():
            if tag.name in map.keys():
                settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid'])
                if tag.name == 'pagestyle':
                    for a in ('evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid'):
                        if tag.has_key(a):
                            settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)]
                settings.pop('autoindex', '')
                self.parsed_objects[id] = map[tag.name][0](**settings)
                x = tag.get('stylelabel', False)
                if x:
                    self._style_labels[x] = self.parsed_objects[id]
                if tag.name == 'registfont':
                    self.book.append(self.parsed_objects[id])


    @classmethod
    def tag_to_string(cls, tag):
        '''
        Convenience method to take a BeautifulSoup Tag and extract the text from it
        recursively.
        @return: A unicode (possibly empty) object
        '''
        if not tag:
            return ''
        strings = []
        for item in tag.contents:
            if isinstance(item, (NavigableString, CData)):
                strings.append(item.string)
            elif isinstance(item, Tag):
                res = cls.tag_to_string(item)
                if res:
                    strings.append(res)
        return u''.join(strings)

    def first_pass(self):
        info = self.soup.find('bbebxylog').find('bookinformation').find('info')
        bookinfo = info.find('bookinfo')
        docinfo  = info.find('docinfo')

        def me(base, tagname):
            tag = base.find(tagname.lower())
            if tag is None:
                return ('', '', '')
            tag = (self.tag_to_string(tag), tag.get('reading') if tag.has_key('reading') else '')
            return tag

        title          = me(bookinfo, 'Title')
        author         = me(bookinfo, 'Author')
        publisher      = me(bookinfo, 'Publisher')
        category       = me(bookinfo, 'Category')[0]
        classification = me(bookinfo, 'Classification')[0]
        freetext       = me(bookinfo, 'FreeText')[0]
        language       = me(docinfo, 'Language')[0]
        creator        = me(docinfo, 'Creator')[0]
        producer       = me(docinfo, 'Producer')[0]
        bookid         = me(bookinfo, 'BookID')[0]

        sd = self.soup.find('setdefault')
        sd = StyleDefault(**self.attrs_to_dict(sd, ['page_tree_id', 'rubyalignandadjust']))
        bs = self.soup.find('booksetting')
        bs = BookSetting(**self.attrs_to_dict(bs, []))

        settings = {}
        thumbnail = self.soup.find('cthumbnail')
        if thumbnail is not None:
            f = thumbnail['file']
            if os.access(f, os.R_OK):
                settings['thumbnail'] = f
            else:
                print _('Could not read from thumbnail file:'), f

        self.book = Book(title=title, author=author, publisher=publisher,
                         category=category, classification=classification,
                         freetext=freetext, language=language, creator=creator,
                         producer=producer, bookid=bookid, setdefault=sd,
                         booksetting=bs, **settings)

        for hdr in self.soup.findAll(['header', 'footer']):
            elem = Header if hdr.name == 'header' else Footer
            self.parsed_objects[hdr.get('objid')] = elem(**self.attrs_to_dict(hdr))

    def render(self, file, to_lrs=False):
        if to_lrs:
            self.book.renderLrs(file, 'utf-8')
        else:
            self.book.renderLrf(file)