def decompress_text(self, number): if self.header_record.compression == 1: return self.section_data(number) if self.header_record.compression == 2 or self.header_record.compression == 258: from calibre.ebooks.compression.palmdoc import decompress_doc return decompress_doc(self.section_data(number)) return ''
def decompress_text(self, number): if self.header_record.compression == 1: return self.section_data(number) if self.header_record.compression == 2 or self.header_record.compression == 258: from calibre.ebooks.compression.palmdoc import decompress_doc return decompress_doc(self.section_data(number)) return b''
def decompress_phtml(self, data): if self.header_record.compression == 2: if self.owner_id: raise NotImplementedError return zlib.decompress(data) elif self.header_record.compression == 1: from calibre.ebooks.compression.palmdoc import decompress_doc return decompress_doc(data)
def decompress_text(self, number): if self.header_record.compression == 2: from calibre.ebooks.compression.palmdoc import decompress_doc return decompress_doc(self.section_data(number)).decode( 'cp1252' if self.encoding is None else self.encoding, 'replace') if self.header_record.compression == 10: return zlib.decompress(self.section_data(number)).decode( 'cp1252' if self.encoding is None else self.encoding, 'replace')
def decompress_text(self, number): if self.header_record.compression == 2: from calibre.ebooks.compression.palmdoc import decompress_doc return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace') if self.header_record.compression == 10: return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
def extract_content(self, output_dir): # Each text record is independent (unless the continuation # value is set in the previous record). Put each converted # text recored into a separate file. We will reference the # home.html file as the first file and let the HTML input # plugin assemble the order based on hyperlinks. with CurrentDir(output_dir): for uid, num in self.uid_text_secion_number.items(): self.log.debug('Writing record with uid: %s as %s.html' % (uid, uid)) with open('%s.html' % uid, 'wb') as htmlf: html = u'<html><body>' section_header, section_data = self.sections[num] if section_header.type == DATATYPE_PHTML: html += self.process_phtml(section_data.data, section_data.header.paragraph_offsets) elif section_header.type == DATATYPE_PHTML_COMPRESSED: d = self.decompress_phtml(section_data.data) html += self.process_phtml(d, section_data.header.paragraph_offsets).decode(self.get_text_uid_encoding(section_header.uid), 'replace') html += '</body></html>' htmlf.write(html.encode('utf-8')) # Images. # Cache the image sizes in case they are used by a composite image. images = set() if not os.path.exists(os.path.join(output_dir, 'images/')): os.makedirs(os.path.join(output_dir, 'images/')) with CurrentDir(os.path.join(output_dir, 'images/')): # Single images. for uid, num in self.uid_image_section_number.items(): section_header, section_data = self.sections[num] if section_data: idata = None if section_header.type == DATATYPE_TBMP: idata = section_data elif section_header.type == DATATYPE_TBMP_COMPRESSED: if self.header_record.compression == 1: idata = decompress_doc(section_data) elif self.header_record.compression == 2: idata = zlib.decompress(section_data) try: save_cover_data_to(idata, '%s.jpg' % uid, compression_quality=70) images.add(uid) self.log.debug('Wrote image with uid %s to images/%s.jpg' % (uid, uid)) except Exception as e: self.log.error('Failed to write image with uid %s: %s' % (uid, e)) else: self.log.error('Failed to write image with uid %s: No data.' % uid) # Composite images. # We're going to use the already compressed .jpg images here. for uid, num in self.uid_composite_image_section_number.items(): try: section_header, section_data = self.sections[num] # Get the final width and height. width = 0 height = 0 for row in section_data.layout: row_width = 0 col_height = 0 for col in row: if col not in images: raise Exception('Image with uid: %s missing.' % col) w, h = identify(lopen('%s.jpg' % col, 'rb'))[1:] row_width += w if col_height < h: col_height = h if width < row_width: width = row_width height += col_height # Create a new image the total size of all image # parts. Put the parts into the new image. with Canvas(width, height) as canvas: y_off = 0 for row in section_data.layout: x_off = 0 largest_height = 0 for col in row: im = image_from_data(lopen('%s.jpg' % col, 'rb').read()) canvas.compose(im, x_off, y_off) w, h = im.width(), im.height() x_off += w if largest_height < h: largest_height = h y_off += largest_height with lopen('%s.jpg' % uid) as out: out.write(canvas.export(compression_quality=70)) self.log.debug('Wrote composite image with uid %s to images/%s.jpg' % (uid, uid)) except Exception as e: self.log.error('Failed to write composite image with uid %s: %s' % (uid, e)) # Run the HTML through the html processing plugin. from calibre.customize.ui import plugin_for_input_format html_input = plugin_for_input_format('html') for opt in html_input.options: setattr(self.options, opt.option.name, opt.recommended_value) self.options.input_encoding = 'utf-8' odi = self.options.debug_pipeline self.options.debug_pipeline = None # Determine the home.html record uid. This should be set in the # reserved values in the metadata recored. home.html is the first # text record (should have hyper link references to other records) # in the document. try: home_html = self.header_record.home_html if not home_html: home_html = self.uid_text_secion_number.items()[0][0] except: raise Exception('Could not determine home.html') # Generate oeb from html conversion. oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {}) self.options.debug_pipeline = odi return oeb
def decompress_text(self, number): from calibre.ebooks.compression.palmdoc import decompress_doc data = bytearray(self.section_data(number)) data = bytes(bytearray(x ^ 0xA5 for x in data)) return decompress_doc(data).decode(self.encoding or 'cp1252', 'replace')
def extract_content(self, output_dir): # Each text record is independent (unless the continuation # value is set in the previous record). Put each converted # text recored into a separate file. We will reference the # home.html file as the first file and let the HTML input # plugin assemble the order based on hyperlinks. with CurrentDir(output_dir): for uid, num in self.uid_text_secion_number.items(): self.log.debug('Writing record with uid: %s as %s.html' % (uid, uid)) with open('%s.html' % uid, 'wb') as htmlf: html = u'<html><body>' section_header, section_data = self.sections[num] if section_header.type == DATATYPE_PHTML: html += self.process_phtml( section_data.data, section_data.header.paragraph_offsets) elif section_header.type == DATATYPE_PHTML_COMPRESSED: d = self.decompress_phtml(section_data.data) html += self.process_phtml( d, section_data.header.paragraph_offsets).decode( self.get_text_uid_encoding(section_header.uid), 'replace') html += '</body></html>' htmlf.write(html.encode('utf-8')) # Images. # Cache the image sizes in case they are used by a composite image. images = set() if not os.path.exists(os.path.join(output_dir, 'images/')): os.makedirs(os.path.join(output_dir, 'images/')) with CurrentDir(os.path.join(output_dir, 'images/')): # Single images. for uid, num in self.uid_image_section_number.items(): section_header, section_data = self.sections[num] if section_data: idata = None if section_header.type == DATATYPE_TBMP: idata = section_data elif section_header.type == DATATYPE_TBMP_COMPRESSED: if self.header_record.compression == 1: idata = decompress_doc(section_data) elif self.header_record.compression == 2: idata = zlib.decompress(section_data) try: save_cover_data_to(idata, '%s.jpg' % uid, compression_quality=70) images.add(uid) self.log.debug( 'Wrote image with uid %s to images/%s.jpg' % (uid, uid)) except Exception as e: self.log.error( 'Failed to write image with uid %s: %s' % (uid, e)) else: self.log.error( 'Failed to write image with uid %s: No data.' % uid) # Composite images. # We're going to use the already compressed .jpg images here. for uid, num in self.uid_composite_image_section_number.items(): try: section_header, section_data = self.sections[num] # Get the final width and height. width = 0 height = 0 for row in section_data.layout: row_width = 0 col_height = 0 for col in row: if col not in images: raise Exception('Image with uid: %s missing.' % col) w, h = identify(lopen('%s.jpg' % col, 'rb'))[1:] row_width += w if col_height < h: col_height = h if width < row_width: width = row_width height += col_height # Create a new image the total size of all image # parts. Put the parts into the new image. with Canvas(width, height) as canvas: y_off = 0 for row in section_data.layout: x_off = 0 largest_height = 0 for col in row: im = image_from_data( lopen('%s.jpg' % col, 'rb').read()) canvas.compose(im, x_off, y_off) w, h = im.width(), im.height() x_off += w if largest_height < h: largest_height = h y_off += largest_height with lopen('%s.jpg' % uid) as out: out.write(canvas.export(compression_quality=70)) self.log.debug( 'Wrote composite image with uid %s to images/%s.jpg' % (uid, uid)) except Exception as e: self.log.error( 'Failed to write composite image with uid %s: %s' % (uid, e)) # Run the HTML through the html processing plugin. from calibre.customize.ui import plugin_for_input_format html_input = plugin_for_input_format('html') for opt in html_input.options: setattr(self.options, opt.option.name, opt.recommended_value) self.options.input_encoding = 'utf-8' odi = self.options.debug_pipeline self.options.debug_pipeline = None # Determine the home.html record uid. This should be set in the # reserved values in the metadata recored. home.html is the first # text record (should have hyper link references to other records) # in the document. try: home_html = self.header_record.home_html if not home_html: home_html = self.uid_text_secion_number.items()[0][0] except: raise Exception('Could not determine home.html') # Generate oeb from html conversion. oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {}) self.options.debug_pipeline = odi return oeb
def decompress_text(self, number): from calibre.ebooks.compression.palmdoc import decompress_doc return decompress_doc(''.join([ chr(ord(x) ^ 0xA5) for x in self.section_data(number) ])).decode('cp1252' if self.encoding is None else self.encoding, 'replace') # noqa
def decompress_text(self, number): from calibre.ebooks.compression.palmdoc import decompress_doc return decompress_doc(''.join([chr(ord(x) ^ 0xA5) for x in self.section_data(number)])).decode('cp1252' if self.encoding is None else self.encoding, 'replace')