def convert_images(self, pages, opts, wide): from calibre.ebooks.lrf.pylrs.pylrs import Book, BookSetting, ImageStream, ImageBlock from uuid import uuid4 from calibre.constants import __appname__, __version__ width, height = (784, 1012) if wide else (584, 754) ps = {} ps['topmargin'] = 0 ps['evensidemargin'] = 0 ps['oddsidemargin'] = 0 ps['textwidth'] = width ps['textheight'] = height book = Book(title=opts.title, author=opts.author, bookid=uuid4().hex, publisher='%s %s'%(__appname__, __version__), category=_('Comic'), pagestyledefault=ps, booksetting=BookSetting(screenwidth=width, screenheight=height)) for page in pages: imageStream = ImageStream(page) _page = book.create_page() _page.append(ImageBlock(refstream=imageStream, blockwidth=width, blockheight=height, xsize=width, ysize=height, x1=width, y1=height)) book.append(_page) book.renderLrf(open(opts.output, 'wb'))
def convert_images(self, pages, opts, wide): from calibre.ebooks.lrf.pylrs.pylrs import Book, BookSetting, ImageStream, ImageBlock from uuid import uuid4 from calibre.constants import __appname__, __version__ width, height = (784, 1012) if wide else (584, 754) ps = {} ps['topmargin'] = 0 ps['evensidemargin'] = 0 ps['oddsidemargin'] = 0 ps['textwidth'] = width ps['textheight'] = height book = Book(title=opts.title, author=opts.author, bookid=uuid4().hex, publisher='%s %s' % (__appname__, __version__), category=_('Comic'), pagestyledefault=ps, booksetting=BookSetting(screenwidth=width, screenheight=height)) for page in pages: imageStream = ImageStream(page) _page = book.create_page() _page.append( ImageBlock(refstream=imageStream, blockwidth=width, blockheight=height, xsize=width, ysize=height, x1=width, y1=height)) book.append(_page) book.renderLrf(open(opts.output, 'wb'))
class LrsParser(object): SELF_CLOSING_TAGS = [ i.lower() for i in [ 'CR', 'Plot', 'NoBR', 'Space', 'PutObj', 'RuledLine', 'Plot', 'SetDefault', 'BookSetting', 'RegistFont', 'PageStyle', 'TextStyle', 'BlockStyle', 'JumpTo', 'ImageStream', 'Image' ] ] def __init__(self, stream, logger): self.logger = logger src = stream.read() self.soup = BeautifulStoneSoup( xml_to_unicode(src)[0], convertEntities=BeautifulStoneSoup.XML_ENTITIES, selfClosingTags=self.SELF_CLOSING_TAGS) self.objects = {} for obj in self.soup.findAll(objid=True): self.objects[obj['objid']] = obj self.parsed_objects = {} self.first_pass() self.second_pass() self.third_pass() self.fourth_pass() self.fifth_pass() def fifth_pass(self): for tag in self.soup.findAll(['canvas', 'header', 'footer']): canvas = self.parsed_objects[tag.get('objid')] for po in tag.findAll('putobj'): canvas.put_object(self.parsed_objects[po.get('refobj')], po.get('x1'), po.get('y1')) @classmethod def attrs_to_dict(cls, tag, exclude=('objid', )): result = {} for key, val in tag.attrs: if key in exclude: continue result[str(key)] = val return result def text_tag_to_element(self, tag): map = { 'span': Span, 'italic': Italic, 'bold': Bold, 'empline': EmpLine, 'sup': Sup, 'sub': Sub, 'cr': CR, 'drawchar': DropCaps, } if tag.name == 'charbutton': return CharButton(self.parsed_objects[tag.get('refobj')], None) if tag.name == 'plot': return Plot(self.parsed_objects[tag.get('refobj')], **self.attrs_to_dict(tag, ['refobj'])) settings = self.attrs_to_dict(tag) settings.pop('spanstyle', '') return map[tag.name](**settings) def process_text_element(self, tag, elem): for item in tag.contents: if isinstance(item, NavigableString): elem.append(item.string) else: subelem = self.text_tag_to_element(item) elem.append(subelem) self.process_text_element(item, subelem) def process_paragraph(self, tag): p = Paragraph() contents = [i for i in tag.contents] if contents: if isinstance(contents[0], NavigableString): contents[0] = contents[0].string.lstrip() for item in contents: if isinstance(item, basestring): p.append(item) elif isinstance(item, NavigableString): p.append(item.string) else: elem = self.text_tag_to_element(item) p.append(elem) self.process_text_element(item, elem) return p def process_text_block(self, tag): tb = self.parsed_objects[tag.get('objid')] for item in tag.contents: if hasattr(item, 'name'): if item.name == 'p': tb.append(self.process_paragraph(item)) elif item.name == 'cr': tb.append(CR()) elif item.name == 'charbutton': # BookDesigner does this p = Paragraph() tb.append(p) elem = self.text_tag_to_element(item) self.process_text_element(item, elem) p.append(elem) def fourth_pass(self): for tag in self.soup.findAll('page'): page = self.parsed_objects[tag.get('objid')] self.book.append(page) for block_tag in tag.findAll([ 'canvas', 'imageblock', 'textblock', 'ruledline', 'simpletextblock' ]): if block_tag.name == 'ruledline': page.append(RuledLine(**self.attrs_to_dict(block_tag))) else: page.append(self.parsed_objects[block_tag.get('objid')]) for tag in self.soup.find('objects').findAll('button'): jt = tag.find('jumpto') tb = self.parsed_objects[jt.get('refobj')] jb = JumpButton(tb) self.book.append(jb) self.parsed_objects[tag.get('objid')] = jb for tag in self.soup.findAll(['textblock', 'simpletextblock']): self.process_text_block(tag) toc = self.soup.find('toc') if toc: for tag in toc.findAll('toclabel'): label = self.tag_to_string(tag) self.book.addTocEntry(label, self.parsed_objects[tag.get('refobj')]) def third_pass(self): map = { 'page': (Page, [ 'pagestyle', 'evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid' ]), 'textblock': (TextBlock, ['textstyle', 'blockstyle']), 'simpletextblock': (TextBlock, ['textstyle', 'blockstyle']), 'imageblock': (ImageBlock, ['blockstyle', 'refstream']), 'image': (Image, ['refstream']), 'canvas': (Canvas, ['canvaswidth', 'canvasheight']), } attrmap = { 'pagestyle': 'pageStyle', 'blockstyle': 'blockStyle', 'textstyle': 'textStyle', } for id, tag in self.objects.items(): if tag.name in map.keys(): settings = self.attrs_to_dict( tag, map[tag.name][1] + ['objid', 'objlabel']) for a in ('pagestyle', 'blockstyle', 'textstyle'): label = tag.get(a, False) if label and \ (label in self._style_labels or label in self.parsed_objects): _obj = self.parsed_objects[label] if \ self.parsed_objects.has_key(label) else \ self._style_labels[label] settings[attrmap[a]] = _obj for a in ('evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid'): if tag.has_key(a): settings[a.replace( 'id', '')] = self.parsed_objects[tag.get(a)] args = [] if tag.has_key('refstream'): args.append(self.parsed_objects[tag.get('refstream')]) if tag.has_key('canvaswidth'): args += [tag.get('canvaswidth'), tag.get('canvasheight')] self.parsed_objects[id] = map[tag.name][0](*args, **settings) def second_pass(self): map = { 'pagestyle': (PageStyle, [ 'stylelabel', 'evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid' ]), 'textstyle': (TextStyle, ['stylelabel', 'rubyalignandadjust']), 'blockstyle': (BlockStyle, ['stylelabel']), 'imagestream': (ImageStream, ['imagestreamlabel']), 'registfont': (Font, []) } self._style_labels = {} for id, tag in self.objects.items(): if tag.name in map.keys(): settings = self.attrs_to_dict(tag, map[tag.name][1] + ['objid']) if tag.name == 'pagestyle': for a in ('evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid'): if tag.has_key(a): settings[a.replace( 'id', '')] = self.parsed_objects[tag.get(a)] settings.pop('autoindex', '') self.parsed_objects[id] = map[tag.name][0](**settings) x = tag.get('stylelabel', False) if x: self._style_labels[x] = self.parsed_objects[id] if tag.name == 'registfont': self.book.append(self.parsed_objects[id]) @classmethod def tag_to_string(cls, tag): ''' Convenience method to take a BeautifulSoup Tag and extract the text from it recursively. @return: A unicode (possibly empty) object ''' if not tag: return '' strings = [] for item in tag.contents: if isinstance(item, (NavigableString, CData)): strings.append(item.string) elif isinstance(item, Tag): res = cls.tag_to_string(item) if res: strings.append(res) return u''.join(strings) def first_pass(self): info = self.soup.find('bbebxylog').find('bookinformation').find('info') bookinfo = info.find('bookinfo') docinfo = info.find('docinfo') def me(base, tagname): tag = base.find(tagname.lower()) if tag is None: return ('', '', '') tag = (self.tag_to_string(tag), tag.get('reading') if tag.has_key('reading') else '') return tag title = me(bookinfo, 'Title') author = me(bookinfo, 'Author') publisher = me(bookinfo, 'Publisher') category = me(bookinfo, 'Category')[0] classification = me(bookinfo, 'Classification')[0] freetext = me(bookinfo, 'FreeText')[0] language = me(docinfo, 'Language')[0] creator = me(docinfo, 'Creator')[0] producer = me(docinfo, 'Producer')[0] bookid = me(bookinfo, 'BookID')[0] sd = self.soup.find('setdefault') sd = StyleDefault( **self.attrs_to_dict(sd, ['page_tree_id', 'rubyalignandadjust'])) bs = self.soup.find('booksetting') bs = BookSetting(**self.attrs_to_dict(bs, [])) settings = {} thumbnail = self.soup.find('cthumbnail') if thumbnail is not None: f = thumbnail['file'] if os.access(f, os.R_OK): settings['thumbnail'] = f else: print _('Could not read from thumbnail file:'), f self.book = Book(title=title, author=author, publisher=publisher, category=category, classification=classification, freetext=freetext, language=language, creator=creator, producer=producer, bookid=bookid, setdefault=sd, booksetting=bs, **settings) for hdr in self.soup.findAll(['header', 'footer']): elem = Header if hdr.name == 'header' else Footer self.parsed_objects[hdr.get('objid')] = elem( **self.attrs_to_dict(hdr)) def render(self, file, to_lrs=False): if to_lrs: self.book.renderLrs(file, 'utf-8') else: self.book.renderLrf(file)
class LrsParser(object): SELF_CLOSING_TAGS = [i.lower() for i in ['CR', 'Plot', 'NoBR', 'Space', 'PutObj', 'RuledLine', 'Plot', 'SetDefault', 'BookSetting', 'RegistFont', 'PageStyle', 'TextStyle', 'BlockStyle', 'JumpTo', 'ImageStream', 'Image']] def __init__(self, stream, logger): self.logger = logger src = stream.read() self.soup = BeautifulStoneSoup(xml_to_unicode(src)[0], convertEntities=BeautifulStoneSoup.XML_ENTITIES, selfClosingTags=self.SELF_CLOSING_TAGS) self.objects = {} for obj in self.soup.findAll(objid=True): self.objects[obj['objid']] = obj self.parsed_objects = {} self.first_pass() self.second_pass() self.third_pass() self.fourth_pass() self.fifth_pass() def fifth_pass(self): for tag in self.soup.findAll(['canvas', 'header', 'footer']): canvas = self.parsed_objects[tag.get('objid')] for po in tag.findAll('putobj'): canvas.put_object(self.parsed_objects[po.get('refobj')], po.get('x1'), po.get('y1')) @classmethod def attrs_to_dict(cls, tag, exclude=('objid',)): result = {} for key, val in tag.attrs: if key in exclude: continue result[str(key)] = val return result def text_tag_to_element(self, tag): map = { 'span' : Span, 'italic' : Italic, 'bold' : Bold, 'empline' : EmpLine, 'sup' : Sup, 'sub' : Sub, 'cr' : CR, 'drawchar': DropCaps, } if tag.name == 'charbutton': return CharButton(self.parsed_objects[tag.get('refobj')], None) if tag.name == 'plot': return Plot(self.parsed_objects[tag.get('refobj')], **self.attrs_to_dict(tag, ['refobj'])) settings = self.attrs_to_dict(tag) settings.pop('spanstyle', '') return map[tag.name](**settings) def process_text_element(self, tag, elem): for item in tag.contents: if isinstance(item, NavigableString): elem.append(item.string) else: subelem = self.text_tag_to_element(item) elem.append(subelem) self.process_text_element(item, subelem) def process_paragraph(self, tag): p = Paragraph() contents = [i for i in tag.contents] if contents: if isinstance(contents[0], NavigableString): contents[0] = contents[0].string.lstrip() for item in contents: if isinstance(item, basestring): p.append(item) elif isinstance(item, NavigableString): p.append(item.string) else: elem = self.text_tag_to_element(item) p.append(elem) self.process_text_element(item, elem) return p def process_text_block(self, tag): tb = self.parsed_objects[tag.get('objid')] for item in tag.contents: if hasattr(item, 'name'): if item.name == 'p': tb.append(self.process_paragraph(item)) elif item.name == 'cr': tb.append(CR()) elif item.name == 'charbutton': # BookDesigner does this p = Paragraph() tb.append(p) elem = self.text_tag_to_element(item) self.process_text_element(item, elem) p.append(elem) def fourth_pass(self): for tag in self.soup.findAll('page'): page = self.parsed_objects[tag.get('objid')] self.book.append(page) for block_tag in tag.findAll(['canvas', 'imageblock', 'textblock', 'ruledline', 'simpletextblock']): if block_tag.name == 'ruledline': page.append(RuledLine(**self.attrs_to_dict(block_tag))) else: page.append(self.parsed_objects[block_tag.get('objid')]) for tag in self.soup.find('objects').findAll('button'): jt = tag.find('jumpto') tb = self.parsed_objects[jt.get('refobj')] jb = JumpButton(tb) self.book.append(jb) self.parsed_objects[tag.get('objid')] = jb for tag in self.soup.findAll(['textblock', 'simpletextblock']): self.process_text_block(tag) toc = self.soup.find('toc') if toc: for tag in toc.findAll('toclabel'): label = self.tag_to_string(tag) self.book.addTocEntry(label, self.parsed_objects[tag.get('refobj')]) def third_pass(self): map = { 'page' : (Page, ['pagestyle', 'evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid']), 'textblock' : (TextBlock, ['textstyle', 'blockstyle']), 'simpletextblock' : (TextBlock, ['textstyle', 'blockstyle']), 'imageblock' : (ImageBlock, ['blockstyle', 'refstream']), 'image' : (Image, ['refstream']), 'canvas' : (Canvas, ['canvaswidth', 'canvasheight']), } attrmap = { 'pagestyle' : 'pageStyle', 'blockstyle' : 'blockStyle', 'textstyle' : 'textStyle', } for id, tag in self.objects.items(): if tag.name in map.keys(): settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid', 'objlabel']) for a in ('pagestyle', 'blockstyle', 'textstyle'): label = tag.get(a, False) if label and \ (label in self._style_labels or label in self.parsed_objects): _obj = self.parsed_objects[label] if \ self.parsed_objects.has_key(label) else \ self._style_labels[label] settings[attrmap[a]] = _obj for a in ('evenfooterid', 'oddfooterid', 'evenheaderid', 'oddheaderid'): if tag.has_key(a): settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)] args = [] if tag.has_key('refstream'): args.append(self.parsed_objects[tag.get('refstream')]) if tag.has_key('canvaswidth'): args += [tag.get('canvaswidth'), tag.get('canvasheight')] self.parsed_objects[id] = map[tag.name][0](*args, **settings) def second_pass(self): map = { 'pagestyle' : (PageStyle, ['stylelabel', 'evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid']), 'textstyle' : (TextStyle, ['stylelabel', 'rubyalignandadjust']), 'blockstyle' : (BlockStyle, ['stylelabel']), 'imagestream': (ImageStream, ['imagestreamlabel']), 'registfont' : (Font, []) } self._style_labels = {} for id, tag in self.objects.items(): if tag.name in map.keys(): settings = self.attrs_to_dict(tag, map[tag.name][1]+['objid']) if tag.name == 'pagestyle': for a in ('evenheaderid', 'oddheaderid', 'evenfooterid', 'oddfooterid'): if tag.has_key(a): settings[a.replace('id', '')] = self.parsed_objects[tag.get(a)] settings.pop('autoindex', '') self.parsed_objects[id] = map[tag.name][0](**settings) x = tag.get('stylelabel', False) if x: self._style_labels[x] = self.parsed_objects[id] if tag.name == 'registfont': self.book.append(self.parsed_objects[id]) @classmethod def tag_to_string(cls, tag): ''' Convenience method to take a BeautifulSoup Tag and extract the text from it recursively. @return: A unicode (possibly empty) object ''' if not tag: return '' strings = [] for item in tag.contents: if isinstance(item, (NavigableString, CData)): strings.append(item.string) elif isinstance(item, Tag): res = cls.tag_to_string(item) if res: strings.append(res) return u''.join(strings) def first_pass(self): info = self.soup.find('bbebxylog').find('bookinformation').find('info') bookinfo = info.find('bookinfo') docinfo = info.find('docinfo') def me(base, tagname): tag = base.find(tagname.lower()) if tag is None: return ('', '', '') tag = (self.tag_to_string(tag), tag.get('reading') if tag.has_key('reading') else '') return tag title = me(bookinfo, 'Title') author = me(bookinfo, 'Author') publisher = me(bookinfo, 'Publisher') category = me(bookinfo, 'Category')[0] classification = me(bookinfo, 'Classification')[0] freetext = me(bookinfo, 'FreeText')[0] language = me(docinfo, 'Language')[0] creator = me(docinfo, 'Creator')[0] producer = me(docinfo, 'Producer')[0] bookid = me(bookinfo, 'BookID')[0] sd = self.soup.find('setdefault') sd = StyleDefault(**self.attrs_to_dict(sd, ['page_tree_id', 'rubyalignandadjust'])) bs = self.soup.find('booksetting') bs = BookSetting(**self.attrs_to_dict(bs, [])) settings = {} thumbnail = self.soup.find('cthumbnail') if thumbnail is not None: f = thumbnail['file'] if os.access(f, os.R_OK): settings['thumbnail'] = f else: print _('Could not read from thumbnail file:'), f self.book = Book(title=title, author=author, publisher=publisher, category=category, classification=classification, freetext=freetext, language=language, creator=creator, producer=producer, bookid=bookid, setdefault=sd, booksetting=bs, **settings) for hdr in self.soup.findAll(['header', 'footer']): elem = Header if hdr.name == 'header' else Footer self.parsed_objects[hdr.get('objid')] = elem(**self.attrs_to_dict(hdr)) def render(self, file, to_lrs=False): if to_lrs: self.book.renderLrs(file, 'utf-8') else: self.book.renderLrf(file)