def __call__(self, **kwargs): positions = {} for name, val in kwargs.iteritems(): if name not in self: raise KeyError('Not a valid header field: %r' % name) self[name] = val buf = BytesIO() buf.write(bytes(self.HEADER_NAME)) for name, val in self.iteritems(): val = self.format_value(name, val) positions[name] = buf.tell() if val is None: raise ValueError('Dynamic field %r not set' % name) if isinstance(val, (int, long)): fmt = b'H' if name in self.SHORT_FIELDS else b'I' val = pack(b'>' + fmt, val) buf.write(val) for pos_field, field in self.POSITIONS.iteritems(): buf.seek(positions[pos_field]) buf.write(pack(b'>I', positions[field])) ans = buf.getvalue() if self.ALIGN_BLOCK: ans = align_block(ans) return ans
def __call__(self, **kwargs): positions = {} for name, val in kwargs.iteritems(): if name not in self: raise KeyError('Not a valid header field: %r'%name) self[name] = val buf = BytesIO() buf.write(bytes(self.HEADER_NAME)) for name, val in self.iteritems(): val = self.format_value(name, val) positions[name] = buf.tell() if val is None: raise ValueError('Dynamic field %r not set'%name) if isinstance(val, (int, long)): fmt = b'H' if name in self.SHORT_FIELDS else b'I' val = pack(b'>'+fmt, val) buf.write(val) for pos_field, field in self.POSITIONS.iteritems(): buf.seek(positions[pos_field]) buf.write(pack(b'>I', positions[field])) ans = buf.getvalue() if self.ALIGN_BLOCK: ans = align_block(ans) return ans
def create_index_record(self, secondary=False): # {{{ header_length = 192 buf = io.BytesIO() indices = list( SecondaryIndexEntry.entries()) if secondary else self.indices # Write index entries offsets = [] for i in indices: offsets.append(buf.tell()) buf.write(i.bytestring) index_block = align_block(buf.getvalue()) # Write offsets to index entries as an IDXT block idxt_block = b'IDXT' buf.seek(0), buf.truncate(0) for offset in offsets: buf.write(pack(b'>H', header_length + offset)) idxt_block = align_block(idxt_block + buf.getvalue()) body = index_block + idxt_block header = b'INDX' buf.seek(0), buf.truncate(0) buf.write(pack(b'>I', header_length)) buf.write(b'\0' * 4) # Unknown buf.write(pack(b'>I', 1)) # Header type? Or index record number? buf.write(b'\0' * 4) # Unknown # IDXT block offset buf.write(pack(b'>I', header_length + len(index_block))) # Number of index entries buf.write(pack(b'>I', len(offsets))) # Unknown buf.write(b'\xff' * 8) # Unknown buf.write(b'\0' * 156) header += buf.getvalue() ans = header + body if len(ans) > 0x10000: raise ValueError('Too many entries (%d) in the TOC' % len(offsets)) return ans
def create_index_record(self, secondary=False): # {{{ header_length = 192 buf = StringIO() indices = list( SecondaryIndexEntry.entries()) if secondary else self.indices # Write index entries offsets = [] for i in indices: offsets.append(buf.tell()) buf.write(i.bytestring) index_block = align_block(buf.getvalue()) # Write offsets to index entries as an IDXT block idxt_block = b'IDXT' buf.seek(0), buf.truncate(0) for offset in offsets: buf.write(pack(b'>H', header_length + offset)) idxt_block = align_block(idxt_block + buf.getvalue()) body = index_block + idxt_block header = b'INDX' buf.seek(0), buf.truncate(0) buf.write(pack(b'>I', header_length)) buf.write(b'\0' * 4) # Unknown buf.write(pack(b'>I', 1)) # Header type? Or index record number? buf.write(b'\0' * 4) # Unknown # IDXT block offset buf.write(pack(b'>I', header_length + len(index_block))) # Number of index entries buf.write(pack(b'>I', len(offsets))) # Unknown buf.write(b'\xff' * 8) # Unknown buf.write(b'\0' * 156) header += buf.getvalue() ans = header + body if len(ans) > 0x10000: raise ValueError('Too many entries (%d) in the TOC' % len(offsets)) return ans
def __call__(self): self.control_bytes = self.calculate_control_bytes_for_each_entry( self.entries) index_blocks, idxt_blocks, record_counts, last_indices = [BytesIO()], [ BytesIO() ], [0], [b''] buf = BytesIO() RECORD_LIMIT = 0x10000 - self.HEADER_LENGTH - 1048 # kindlegen uses 1048 (there has to be some margin because of block alignment) for i, (index_num, tags) in enumerate(self.entries): control_bytes = self.control_bytes[i] buf.seek(0), buf.truncate(0) index_num = (index_num.encode('utf-8') if isinstance( index_num, unicode) else index_num) raw = bytearray(index_num) raw.insert(0, len(index_num)) buf.write(bytes(raw)) buf.write(bytes(bytearray(control_bytes))) for tag in self.tag_types: values = tags.get(tag.name, None) if values is None: continue try: len(values) except TypeError: values = [values] if values: for val in values: try: buf.write(encint(val)) except ValueError: raise ValueError('Invalid values for %r: %r' % (tag, values)) raw = buf.getvalue() offset = index_blocks[-1].tell() idxt_pos = idxt_blocks[-1].tell() if offset + idxt_pos + len(raw) + 2 > RECORD_LIMIT: index_blocks.append(BytesIO()) idxt_blocks.append(BytesIO()) record_counts.append(0) offset = idxt_pos = 0 last_indices.append(b'') record_counts[-1] += 1 idxt_blocks[-1].write(pack(b'>H', self.HEADER_LENGTH + offset)) index_blocks[-1].write(raw) last_indices[-1] = index_num index_records = [] for index_block, idxt_block, record_count in zip( index_blocks, idxt_blocks, record_counts): index_block = align_block(index_block.getvalue()) idxt_block = align_block(b'IDXT' + idxt_block.getvalue()) # Create header for this index record header = b'INDX' buf.seek(0), buf.truncate(0) buf.write(pack(b'>I', self.HEADER_LENGTH)) buf.write(b'\0' * 4) # Unknown buf.write( pack(b'>I', 1) ) # Header type (0 for Index header record and 1 for Index records) buf.write(b'\0' * 4) # Unknown # IDXT block offset buf.write(pack(b'>I', self.HEADER_LENGTH + len(index_block))) # Number of index entries in this record buf.write(pack(b'>I', record_count)) buf.write(b'\xff' * 8) # Unknown buf.write(b'\0' * 156) # Unknown header += buf.getvalue() index_records.append(header + index_block + idxt_block) if len(index_records[-1]) > 0x10000: raise ValueError( 'Failed to rollover index blocks for very large index.') # Create the Index Header record tagx = self.generate_tagx() # Geometry of the index records is written as index entries pointed to # by the IDXT records buf.seek(0), buf.truncate() idxt = [b'IDXT'] pos = IndexHeader.HEADER_LENGTH + len(tagx) for last_idx, num in zip(last_indices, record_counts): start = buf.tell() idxt.append(pack(b'>H', pos)) buf.write(bytes(bytearray([len(last_idx)])) + last_idx) buf.write(pack(b'>H', num)) pos += buf.tell() - start header = { 'num_of_entries': sum(r for r in record_counts), 'num_of_records': len(index_records), 'num_of_cncx': len(self.cncx), 'tagx': align_block(tagx), 'geometry': align_block(buf.getvalue()), 'idxt': align_block(b''.join(idxt)), } header = IndexHeader()(**header) self.records = [header] + index_records self.records.extend(self.cncx.records) return self.records
def __call__(self): self.control_bytes = self.calculate_control_bytes_for_each_entry( self.entries) rendered_entries = [] index, idxt, buf = BytesIO(), BytesIO(), BytesIO() IndexEntry = namedtuple('IndexEntry', 'offset length raw') last_lead_text = b'' too_large = ValueError('Index has too many entries, calibre does not' ' support generating multiple index records at this' ' time.') for i, x in enumerate(self.entries): control_bytes = self.control_bytes[i] leading_text, tags = x buf.seek(0), buf.truncate(0) leading_text = (leading_text.encode('utf-8') if isinstance(leading_text, unicode) else leading_text) raw = bytearray(leading_text) raw.insert(0, len(leading_text)) buf.write(bytes(raw)) buf.write(bytes(bytearray(control_bytes))) for tag in self.tag_types: values = tags.get(tag.name, None) if values is None: continue try: len(values) except TypeError: values = [values] if values: for val in values: try: buf.write(encint(val)) except ValueError: raise ValueError('Invalid values for %r: %r'%( tag, values)) raw = buf.getvalue() offset = index.tell() if offset + self.HEADER_LENGTH >= 0x10000: raise too_large rendered_entries.append(IndexEntry(offset, len(raw), raw)) idxt.write(pack(b'>H', self.HEADER_LENGTH+offset)) index.write(raw) last_lead_text = leading_text index_block = align_block(index.getvalue()) idxt_block = align_block(b'IDXT' + idxt.getvalue()) body = index_block + idxt_block if len(body) + self.HEADER_LENGTH >= 0x10000: raise too_large header = b'INDX' buf.seek(0), buf.truncate(0) buf.write(pack(b'>I', self.HEADER_LENGTH)) buf.write(b'\0'*4) # Unknown buf.write(pack(b'>I', 1)) # Header type? Or index record number? buf.write(b'\0'*4) # Unknown # IDXT block offset buf.write(pack(b'>I', self.HEADER_LENGTH + len(index_block))) # Number of index entries buf.write(pack(b'>I', len(rendered_entries))) buf.write(b'\xff'*8) # Unknown buf.write(b'\0'*156) # Unknown header += buf.getvalue() index_record = header + body tagx = self.generate_tagx() idxt = (b'IDXT' + pack(b'>H', IndexHeader.HEADER_LENGTH + len(tagx)) + b'\0') # Last index idx = bytes(bytearray([len(last_lead_text)])) + last_lead_text idx += pack(b'>H', len(rendered_entries)) header = { 'num_of_entries': len(rendered_entries), 'num_of_cncx': len(self.cncx), 'tagx':tagx, 'last_index':align_block(idx), 'idxt':idxt } header = IndexHeader()(**header) self.records = [header, index_record] self.records.extend(self.cncx.records) return self.records
def generate_record0(self): # MOBI header {{{ metadata = self.oeb.metadata bt = 0x002 if self.primary_index_record_idx is not None: if False and self.indexer.is_flat_periodical: # Disabled as setting this to 0x102 causes the Kindle to not # auto archive the issues bt = 0x102 elif self.indexer.is_periodical: # If you change this, remember to change the cdetype in the EXTH # header as well bt = 0x103 if self.indexer.is_flat_periodical else 0x101 from calibre.ebooks.mobi.writer8.exth import build_exth exth = build_exth(metadata, prefer_author_sort=self.opts.prefer_author_sort, is_periodical=self.is_periodical, share_not_sync=self.opts.share_not_sync, cover_offset=self.cover_offset, thumbnail_offset=self.thumbnail_offset, start_offset=self.serializer.start_offset, mobi_doctype=bt ) first_image_record = None if self.resources: used_images = self.serializer.used_images first_image_record = len(self.records) self.resources.serialize(self.records, used_images) last_content_record = len(self.records) - 1 # FCIS/FLIS (Seems to serve no purpose) flis_number = len(self.records) self.records.append(FLIS) fcis_number = len(self.records) self.records.append(fcis(self.text_length)) # EOF record self.records.append(b'\xE9\x8E\x0D\x0A') record0 = io.BytesIO() # The MOBI Header record0.write(pack(b'>HHIHHHH', self.compression, # compression type # compression type 0, # Unused self.text_length, # Text length self.last_text_record_idx, # Number of text records or last tr idx RECORD_SIZE, # Text record size 0, # Unused 0 # Unused )) # 0 - 15 (0x0 - 0xf) uid = random.randint(0, 0xffffffff) title = normalize(unicode_type(metadata.title[0])).encode('utf-8') # 0x0 - 0x3 record0.write(b'MOBI') # 0x4 - 0x7 : Length of header # 0x8 - 0x11 : MOBI type # type meaning # 0x002 MOBI book (chapter - chapter navigation) # 0x101 News - Hierarchical navigation with sections and articles # 0x102 News feed - Flat navigation # 0x103 News magazine - same as 0x101 # 0xC - 0xF : Text encoding (65001 is utf-8) # 0x10 - 0x13 : UID # 0x14 - 0x17 : Generator version record0.write(pack(b'>IIIII', 0xe8, bt, 65001, uid, 6)) # 0x18 - 0x1f : Unknown record0.write(b'\xff' * 8) # 0x20 - 0x23 : Secondary index record sir = 0xffffffff if (self.primary_index_record_idx is not None and self.indexer.secondary_record_offset is not None): sir = (self.primary_index_record_idx + self.indexer.secondary_record_offset) record0.write(pack(b'>I', sir)) # 0x24 - 0x3f : Unknown record0.write(b'\xff' * 28) # 0x40 - 0x43 : Offset of first non-text record record0.write(pack(b'>I', self.first_non_text_record_idx)) # 0x44 - 0x4b : title offset, title length record0.write(pack(b'>II', 0xe8 + 16 + len(exth), len(title))) # 0x4c - 0x4f : Language specifier record0.write(iana2mobi( unicode_type(metadata.language[0]))) # 0x50 - 0x57 : Input language and Output language record0.write(b'\0' * 8) # 0x58 - 0x5b : Format version # 0x5c - 0x5f : First image record number record0.write(pack(b'>II', 6, first_image_record if first_image_record else len(self.records))) # 0x60 - 0x63 : First HUFF/CDIC record number # 0x64 - 0x67 : Number of HUFF/CDIC records # 0x68 - 0x6b : First DATP record number # 0x6c - 0x6f : Number of DATP records record0.write(b'\0' * 16) # 0x70 - 0x73 : EXTH flags # Bit 6 (0b1000000) being set indicates the presence of an EXTH header # Bit 12 being set indicates the presence of embedded fonts # The purpose of the other bits is unknown exth_flags = 0b1010000 if self.is_periodical: exth_flags |= 0b1000 if self.resources.has_fonts: exth_flags |= 0b1000000000000 record0.write(pack(b'>I', exth_flags)) # 0x74 - 0x93 : Unknown record0.write(b'\0' * 32) # 0x94 - 0x97 : DRM offset # 0x98 - 0x9b : DRM count # 0x9c - 0x9f : DRM size # 0xa0 - 0xa3 : DRM flags record0.write(pack(b'>IIII', 0xffffffff, 0xffffffff, 0, 0)) # 0xa4 - 0xaf : Unknown record0.write(b'\0'*12) # 0xb0 - 0xb1 : First content record number # 0xb2 - 0xb3 : last content record number # (Includes Image, DATP, HUFF, DRM) record0.write(pack(b'>HH', 1, last_content_record)) # 0xb4 - 0xb7 : Unknown record0.write(b'\0\0\0\x01') # 0xb8 - 0xbb : FCIS record number record0.write(pack(b'>I', fcis_number)) # 0xbc - 0xbf : Unknown (FCIS record count?) record0.write(pack(b'>I', 1)) # 0xc0 - 0xc3 : FLIS record number record0.write(pack(b'>I', flis_number)) # 0xc4 - 0xc7 : Unknown (FLIS record count?) record0.write(pack(b'>I', 1)) # 0xc8 - 0xcf : Unknown record0.write(b'\0'*8) # 0xd0 - 0xdf : Unknown record0.write(pack(b'>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff)) # 0xe0 - 0xe3 : Extra record data # Extra record data flags: # - 0b1 : <extra multibyte bytes><size> # - 0b10 : <TBS indexing description of this HTML record><size> # - 0b100: <uncrossable breaks><size> # Setting bit 2 (0x2) disables <guide><reference type="start"> functionality extra_data_flags = 0b1 # Has multibyte overlap bytes if self.primary_index_record_idx is not None: extra_data_flags |= 0b10 if WRITE_UNCROSSABLE_BREAKS: extra_data_flags |= 0b100 record0.write(pack(b'>I', extra_data_flags)) # 0xe4 - 0xe7 : Primary index record record0.write(pack(b'>I', 0xffffffff if self.primary_index_record_idx is None else self.primary_index_record_idx)) record0.write(exth) record0.write(title) record0 = record0.getvalue() # Add some buffer so that Amazon can add encryption information if this # MOBI is submitted for publication record0 += (b'\0' * (1024*8)) self.records[0] = align_block(record0)
def create_header(self, secondary=False): # {{{ buf = io.BytesIO() if secondary: tagx_block = TAGX().secondary else: tagx_block = (TAGX().periodical if self.is_periodical else TAGX().flat_book) header_length = 192 # Ident 0 - 4 buf.write(b'INDX') # Header length 4 - 8 buf.write(pack(b'>I', header_length)) # Unknown 8-16 buf.write(b'\0' * 8) # Index type: 0 - normal, 2 - inflection 16 - 20 buf.write(pack(b'>I', 2)) # IDXT offset 20-24 buf.write(pack(b'>I', 0)) # Filled in later # Number of index records 24-28 buf.write(pack(b'>I', 1 if secondary else len(self.records))) # Index Encoding 28-32 buf.write(pack(b'>I', 65001)) # utf-8 # Unknown 32-36 buf.write(b'\xff' * 4) # Number of index entries 36-40 indices = list( SecondaryIndexEntry.entries()) if secondary else self.indices buf.write(pack(b'>I', len(indices))) # ORDT offset 40-44 buf.write(pack(b'>I', 0)) # LIGT offset 44-48 buf.write(pack(b'>I', 0)) # Number of LIGT entries 48-52 buf.write(pack(b'>I', 0)) # Number of CNCX records 52-56 buf.write(pack(b'>I', 0 if secondary else len(self.cncx.records))) # Unknown 56-180 buf.write(b'\0' * 124) # TAGX offset 180-184 buf.write(pack(b'>I', header_length)) # Unknown 184-192 buf.write(b'\0' * 8) # TAGX block buf.write(tagx_block) num = len(indices) # The index of the last entry in the NCX idx = indices[-1].index if isinstance(idx, numbers.Integral): idx = encode_number_as_hex(idx) else: idx = idx.encode('ascii') idx = (bytes(bytearray([len(idx)]))) + idx buf.write(idx) # The number of entries in the NCX buf.write(pack(b'>H', num)) # Padding pad = (4 - (buf.tell() % 4)) % 4 if pad: buf.write(b'\0' * pad) idxt_offset = buf.tell() buf.write(b'IDXT') buf.write(pack(b'>H', header_length + len(tagx_block))) buf.write(b'\0') buf.seek(20) buf.write(pack(b'>I', idxt_offset)) return align_block(buf.getvalue())
def create_header(self, secondary=False): # {{{ buf = io.BytesIO() if secondary: tagx_block = TAGX().secondary else: tagx_block = (TAGX().periodical if self.is_periodical else TAGX().flat_book) header_length = 192 # Ident 0 - 4 buf.write(b'INDX') # Header length 4 - 8 buf.write(pack(b'>I', header_length)) # Unknown 8-16 buf.write(b'\0'*8) # Index type: 0 - normal, 2 - inflection 16 - 20 buf.write(pack(b'>I', 2)) # IDXT offset 20-24 buf.write(pack(b'>I', 0)) # Filled in later # Number of index records 24-28 buf.write(pack(b'>I', 1 if secondary else len(self.records))) # Index Encoding 28-32 buf.write(pack(b'>I', 65001)) # utf-8 # Unknown 32-36 buf.write(b'\xff'*4) # Number of index entries 36-40 indices = list(SecondaryIndexEntry.entries()) if secondary else self.indices buf.write(pack(b'>I', len(indices))) # ORDT offset 40-44 buf.write(pack(b'>I', 0)) # LIGT offset 44-48 buf.write(pack(b'>I', 0)) # Number of LIGT entries 48-52 buf.write(pack(b'>I', 0)) # Number of CNCX records 52-56 buf.write(pack(b'>I', 0 if secondary else len(self.cncx.records))) # Unknown 56-180 buf.write(b'\0'*124) # TAGX offset 180-184 buf.write(pack(b'>I', header_length)) # Unknown 184-192 buf.write(b'\0'*8) # TAGX block buf.write(tagx_block) num = len(indices) # The index of the last entry in the NCX idx = indices[-1].index if isinstance(idx, numbers.Integral): idx = encode_number_as_hex(idx) else: idx = idx.encode('ascii') idx = (bytes(bytearray([len(idx)]))) + idx buf.write(idx) # The number of entries in the NCX buf.write(pack(b'>H', num)) # Padding pad = (4 - (buf.tell()%4))%4 if pad: buf.write(b'\0'*pad) idxt_offset = buf.tell() buf.write(b'IDXT') buf.write(pack(b'>H', header_length + len(tagx_block))) buf.write(b'\0') buf.seek(20) buf.write(pack(b'>I', idxt_offset)) return align_block(buf.getvalue())
def __call__(self): self.control_bytes = self.calculate_control_bytes_for_each_entry( self.entries) index_blocks, idxt_blocks, record_counts, last_indices = [BytesIO()], [BytesIO()], [0], [b''] buf = BytesIO() RECORD_LIMIT = 0x10000 - self.HEADER_LENGTH - 1048 # kindlegen uses 1048 (there has to be some margin because of block alignment) for i, (index_num, tags) in enumerate(self.entries): control_bytes = self.control_bytes[i] buf.seek(0), buf.truncate(0) index_num = (index_num.encode('utf-8') if isinstance(index_num, unicode) else index_num) raw = bytearray(index_num) raw.insert(0, len(index_num)) buf.write(bytes(raw)) buf.write(bytes(bytearray(control_bytes))) for tag in self.tag_types: values = tags.get(tag.name, None) if values is None: continue try: len(values) except TypeError: values = [values] if values: for val in values: try: buf.write(encint(val)) except ValueError: raise ValueError('Invalid values for %r: %r'%( tag, values)) raw = buf.getvalue() offset = index_blocks[-1].tell() idxt_pos = idxt_blocks[-1].tell() if offset + idxt_pos + len(raw) + 2 > RECORD_LIMIT: index_blocks.append(BytesIO()) idxt_blocks.append(BytesIO()) record_counts.append(0) offset = idxt_pos = 0 last_indices.append(b'') record_counts[-1] += 1 idxt_blocks[-1].write(pack(b'>H', self.HEADER_LENGTH+offset)) index_blocks[-1].write(raw) last_indices[-1] = index_num index_records = [] for index_block, idxt_block, record_count in zip(index_blocks, idxt_blocks, record_counts): index_block = align_block(index_block.getvalue()) idxt_block = align_block(b'IDXT' + idxt_block.getvalue()) # Create header for this index record header = b'INDX' buf.seek(0), buf.truncate(0) buf.write(pack(b'>I', self.HEADER_LENGTH)) buf.write(b'\0'*4) # Unknown buf.write(pack(b'>I', 1)) # Header type (0 for Index header record and 1 for Index records) buf.write(b'\0'*4) # Unknown # IDXT block offset buf.write(pack(b'>I', self.HEADER_LENGTH + len(index_block))) # Number of index entries in this record buf.write(pack(b'>I', record_count)) buf.write(b'\xff'*8) # Unknown buf.write(b'\0'*156) # Unknown header += buf.getvalue() index_records.append(header + index_block + idxt_block) if len(index_records[-1]) > 0x10000: raise ValueError('Failed to rollover index blocks for very large index.') # Create the Index Header record tagx = self.generate_tagx() # Geometry of the index records is written as index entries pointed to # by the IDXT records buf.seek(0), buf.truncate() idxt = [b'IDXT'] pos = IndexHeader.HEADER_LENGTH + len(tagx) for last_idx, num in zip(last_indices, record_counts): start = buf.tell() idxt.append(pack(b'>H', pos)) buf.write(bytes(bytearray([len(last_idx)])) + last_idx) buf.write(pack(b'>H', num)) pos += buf.tell() - start header = { 'num_of_entries': sum(r for r in record_counts), 'num_of_records': len(index_records), 'num_of_cncx': len(self.cncx), 'tagx':align_block(tagx), 'geometry':align_block(buf.getvalue()), 'idxt':align_block(b''.join(idxt)), } header = IndexHeader()(**header) self.records = [header] + index_records self.records.extend(self.cncx.records) return self.records
def generate_record0(self): # MOBI header {{{ metadata = self.oeb.metadata bt = 0x002 if self.primary_index_record_idx is not None: if False and self.indexer.is_flat_periodical: # Disabled as setting this to 0x102 causes the Kindle to not # auto archive the issues bt = 0x102 elif self.indexer.is_periodical: # If you change this, remember to change the cdetype in the EXTH # header as well bt = 0x103 if self.indexer.is_flat_periodical else 0x101 from calibre.ebooks.mobi.writer8.exth import build_exth exth = build_exth(metadata, prefer_author_sort=self.opts.prefer_author_sort, is_periodical=self.is_periodical, share_not_sync=self.opts.share_not_sync, cover_offset=self.cover_offset, thumbnail_offset=self.thumbnail_offset, start_offset=self.serializer.start_offset, mobi_doctype=bt ) first_image_record = None if self.resources: used_images = self.serializer.used_images first_image_record = len(self.records) self.resources.serialize(self.records, used_images) last_content_record = len(self.records) - 1 # FCIS/FLIS (Seems to serve no purpose) flis_number = len(self.records) self.records.append(FLIS) fcis_number = len(self.records) self.records.append(fcis(self.text_length)) # EOF record self.records.append(b'\xE9\x8E\x0D\x0A') record0 = StringIO() # The MOBI Header record0.write(pack(b'>HHIHHHH', self.compression, # compression type # compression type 0, # Unused self.text_length, # Text length self.last_text_record_idx, # Number of text records or last tr idx RECORD_SIZE, # Text record size 0, # Unused 0 # Unused )) # 0 - 15 (0x0 - 0xf) uid = random.randint(0, 0xffffffff) title = normalize(unicode(metadata.title[0])).encode('utf-8') # 0x0 - 0x3 record0.write(b'MOBI') # 0x4 - 0x7 : Length of header # 0x8 - 0x11 : MOBI type # type meaning # 0x002 MOBI book (chapter - chapter navigation) # 0x101 News - Hierarchical navigation with sections and articles # 0x102 News feed - Flat navigation # 0x103 News magazine - same as 0x101 # 0xC - 0xF : Text encoding (65001 is utf-8) # 0x10 - 0x13 : UID # 0x14 - 0x17 : Generator version record0.write(pack(b'>IIIII', 0xe8, bt, 65001, uid, 6)) # 0x18 - 0x1f : Unknown record0.write(b'\xff' * 8) # 0x20 - 0x23 : Secondary index record sir = 0xffffffff if (self.primary_index_record_idx is not None and self.indexer.secondary_record_offset is not None): sir = (self.primary_index_record_idx + self.indexer.secondary_record_offset) record0.write(pack(b'>I', sir)) # 0x24 - 0x3f : Unknown record0.write(b'\xff' * 28) # 0x40 - 0x43 : Offset of first non-text record record0.write(pack(b'>I', self.first_non_text_record_idx)) # 0x44 - 0x4b : title offset, title length record0.write(pack(b'>II', 0xe8 + 16 + len(exth), len(title))) # 0x4c - 0x4f : Language specifier record0.write(iana2mobi( str(metadata.language[0]))) # 0x50 - 0x57 : Input language and Output language record0.write(b'\0' * 8) # 0x58 - 0x5b : Format version # 0x5c - 0x5f : First image record number record0.write(pack(b'>II', 6, first_image_record if first_image_record else len(self.records))) # 0x60 - 0x63 : First HUFF/CDIC record number # 0x64 - 0x67 : Number of HUFF/CDIC records # 0x68 - 0x6b : First DATP record number # 0x6c - 0x6f : Number of DATP records record0.write(b'\0' * 16) # 0x70 - 0x73 : EXTH flags # Bit 6 (0b1000000) being set indicates the presence of an EXTH header # Bit 12 being set indicates the presence of embedded fonts # The purpose of the other bits is unknown exth_flags = 0b1010000 if self.is_periodical: exth_flags |= 0b1000 if self.resources.has_fonts: exth_flags |= 0b1000000000000 record0.write(pack(b'>I', exth_flags)) # 0x74 - 0x93 : Unknown record0.write(b'\0' * 32) # 0x94 - 0x97 : DRM offset # 0x98 - 0x9b : DRM count # 0x9c - 0x9f : DRM size # 0xa0 - 0xa3 : DRM flags record0.write(pack(b'>IIII', 0xffffffff, 0xffffffff, 0, 0)) # 0xa4 - 0xaf : Unknown record0.write(b'\0'*12) # 0xb0 - 0xb1 : First content record number # 0xb2 - 0xb3 : last content record number # (Includes Image, DATP, HUFF, DRM) record0.write(pack(b'>HH', 1, last_content_record)) # 0xb4 - 0xb7 : Unknown record0.write(b'\0\0\0\x01') # 0xb8 - 0xbb : FCIS record number record0.write(pack(b'>I', fcis_number)) # 0xbc - 0xbf : Unknown (FCIS record count?) record0.write(pack(b'>I', 1)) # 0xc0 - 0xc3 : FLIS record number record0.write(pack(b'>I', flis_number)) # 0xc4 - 0xc7 : Unknown (FLIS record count?) record0.write(pack(b'>I', 1)) # 0xc8 - 0xcf : Unknown record0.write(b'\0'*8) # 0xd0 - 0xdf : Unknown record0.write(pack(b'>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff)) # 0xe0 - 0xe3 : Extra record data # Extra record data flags: # - 0b1 : <extra multibyte bytes><size> # - 0b10 : <TBS indexing description of this HTML record><size> # - 0b100: <uncrossable breaks><size> # Setting bit 2 (0x2) disables <guide><reference type="start"> functionality extra_data_flags = 0b1 # Has multibyte overlap bytes if self.primary_index_record_idx is not None: extra_data_flags |= 0b10 if WRITE_UNCROSSABLE_BREAKS: extra_data_flags |= 0b100 record0.write(pack(b'>I', extra_data_flags)) # 0xe4 - 0xe7 : Primary index record record0.write(pack(b'>I', 0xffffffff if self.primary_index_record_idx is None else self.primary_index_record_idx)) record0.write(exth) record0.write(title) record0 = record0.getvalue() # Add some buffer so that Amazon can add encryption information if this # MOBI is submitted for publication record0 += (b'\0' * (1024*8)) self.records[0] = align_block(record0)
def __call__(self): self.control_bytes = self.calculate_control_bytes_for_each_entry( self.entries) rendered_entries = [] index, idxt, buf = BytesIO(), BytesIO(), BytesIO() IndexEntry = namedtuple('IndexEntry', 'offset length raw') last_lead_text = b'' too_large = ValueError( 'Index has too many entries, calibre does not' ' support generating multiple index records at this' ' time.') for i, x in enumerate(self.entries): control_bytes = self.control_bytes[i] leading_text, tags = x buf.seek(0), buf.truncate(0) leading_text = (leading_text.encode('utf-8') if isinstance( leading_text, unicode) else leading_text) raw = bytearray(leading_text) raw.insert(0, len(leading_text)) buf.write(bytes(raw)) buf.write(bytes(bytearray(control_bytes))) for tag in self.tag_types: values = tags.get(tag.name, None) if values is None: continue try: len(values) except TypeError: values = [values] if values: for val in values: try: buf.write(encint(val)) except ValueError: raise ValueError('Invalid values for %r: %r' % (tag, values)) raw = buf.getvalue() offset = index.tell() if offset + self.HEADER_LENGTH >= 0x10000: raise too_large rendered_entries.append(IndexEntry(offset, len(raw), raw)) idxt.write(pack(b'>H', self.HEADER_LENGTH + offset)) index.write(raw) last_lead_text = leading_text index_block = align_block(index.getvalue()) idxt_block = align_block(b'IDXT' + idxt.getvalue()) body = index_block + idxt_block if len(body) + self.HEADER_LENGTH >= 0x10000: raise too_large header = b'INDX' buf.seek(0), buf.truncate(0) buf.write(pack(b'>I', self.HEADER_LENGTH)) buf.write(b'\0' * 4) # Unknown buf.write(pack(b'>I', 1)) # Header type? Or index record number? buf.write(b'\0' * 4) # Unknown # IDXT block offset buf.write(pack(b'>I', self.HEADER_LENGTH + len(index_block))) # Number of index entries buf.write(pack(b'>I', len(rendered_entries))) buf.write(b'\xff' * 8) # Unknown buf.write(b'\0' * 156) # Unknown header += buf.getvalue() index_record = header + body tagx = self.generate_tagx() idxt = (b'IDXT' + pack(b'>H', IndexHeader.HEADER_LENGTH + len(tagx)) + b'\0') # Last index idx = bytes(bytearray([len(last_lead_text)])) + last_lead_text idx += pack(b'>H', len(rendered_entries)) header = { 'num_of_entries': len(rendered_entries), 'num_of_cncx': len(self.cncx), 'tagx': tagx, 'last_index': align_block(idx), 'idxt': idxt } header = IndexHeader()(**header) self.records = [header, index_record] self.records.extend(self.cncx.records) return self.records