def create_indices(self): self.skel_records = SkelIndex(self.skel_table)() self.chunk_records = ChunkIndex(self.chunk_table)() self.ncx_records = [] toc = self.oeb.toc entries = [] is_periodical = self.opts.mobi_periodical if toc.count() < 1: self.log.warn('Document has no ToC, MOBI will have no NCX index') return # Flatten the ToC into a depth first list fl = toc.iterdescendants() for i, item in enumerate(fl): entry = { 'id': id(item), 'index': i, 'label': (item.title or _('Unknown')), 'children': [] } entry['depth'] = getattr(item, 'ncx_hlvl', 0) p = getattr(item, 'ncx_parent', None) if p is not None: entry['parent_id'] = p for child in item: child.ncx_parent = entry['id'] child.ncx_hlvl = entry['depth'] + 1 entry['children'].append(id(child)) if is_periodical: if item.author: entry['author'] = item.author if item.description: entry['description'] = item.description entries.append(entry) href = item.href or '' href, frag = href.partition('#')[0::2] aid = self.id_map.get((href, frag), None) if aid is None: aid = self.id_map.get((href, ''), None) if aid is None: pos, fid = 0, 0 chunk = self.chunk_table[pos] offset = chunk.insert_pos + fid else: pos, fid, offset = self.aid_offset_map[aid] entry['pos_fid'] = (pos, fid) entry['offset'] = offset # The Kindle requires entries to be sorted by (depth, playorder) # However, I cannot figure out how to deal with non linear ToCs, i.e. # ToCs whose nth entry at depth d has an offset after its n+k entry at # the same depth, so we sort on (depth, offset) instead. This re-orders # the ToC to be linear. A non-linear ToC causes section to section # jumping to not work. kindlegen somehow handles non-linear tocs, but I # cannot figure out how. original = sorted(entries, key=lambda entry: (entry['depth'], entry['index'])) linearized = sorted(entries, key=lambda entry: (entry['depth'], entry['offset'])) is_non_linear = original != linearized entries = linearized is_non_linear = False # False as we are using the linearized entries if is_non_linear: for entry in entries: entry['kind'] = 'chapter' for i, entry in enumerate(entries): entry['index'] = i id_to_index = {entry['id']: entry['index'] for entry in entries} # Write the hierarchical information for entry in entries: children = entry.pop('children') if children: entry['first_child'] = id_to_index[children[0]] entry['last_child'] = id_to_index[children[-1]] if 'parent_id' in entry: entry['parent'] = id_to_index[entry.pop('parent_id')] # Write the lengths def get_next_start(entry): enders = [ e['offset'] for e in entries if e['depth'] <= entry['depth'] and e['offset'] > entry['offset'] ] if enders: return min(enders) return len(self.flows[0]) for entry in entries: entry['length'] = get_next_start(entry) - entry['offset'] self.has_tbs = apply_trailing_byte_sequences( entries, self.records, self.uncompressed_record_lengths) idx_type = NonLinearNCXIndex if is_non_linear else NCXIndex self.ncx_records = idx_type(entries)()
def create_indices(self): self.skel_records = SkelIndex(self.skel_table)() self.chunk_records = ChunkIndex(self.chunk_table)() self.ncx_records = [] toc = self.oeb.toc entries = [] is_periodical = self.opts.mobi_periodical if toc.count() < 2: self.log.warn('Document has no ToC, MOBI will have no NCX index') return # Flatten the ToC into a depth first list fl = toc.iterdescendants() for i, item in enumerate(fl): entry = {'id': id(item), 'index': i, 'label':(item.title or _('Unknown')), 'children':[]} entry['depth'] = getattr(item, 'ncx_hlvl', 0) p = getattr(item, 'ncx_parent', None) if p is not None: entry['parent_id'] = p for child in item: child.ncx_parent = entry['id'] child.ncx_hlvl = entry['depth'] + 1 entry['children'].append(id(child)) if is_periodical: if item.author: entry['author'] = item.author if item.description: entry['description'] = item.description entries.append(entry) href = item.href or '' href, frag = href.partition('#')[0::2] aid = self.id_map.get((href, frag), None) if aid is None: aid = self.id_map.get((href, ''), None) if aid is None: pos, fid = 0, 0 chunk = self.chunk_table[pos] offset = chunk.insert_pos + fid else: pos, fid, offset = self.aid_offset_map[aid] entry['pos_fid'] = (pos, fid) entry['offset'] = offset # The Kindle requires entries to be sorted by (depth, playorder) # However, I cannot figure out how to deal with non linear ToCs, i.e. # ToCs whose nth entry at depth d has an offset after its n+k entry at # the same depth, so we sort on (depth, offset) instead. This re-orders # the ToC to be linear. A non-linear ToC causes section to section # jumping to not work. kindlegen somehow handles non-linear tocs, but I # cannot figure out how. original = sorted(entries, key=lambda entry: (entry['depth'], entry['index'])) linearized = sorted(entries, key=lambda entry: (entry['depth'], entry['offset'])) is_non_linear = original != linearized entries = linearized is_non_linear = False # False as we are using the linearized entries if is_non_linear: for entry in entries: entry['kind'] = 'chapter' for i, entry in enumerate(entries): entry['index'] = i id_to_index = {entry['id']:entry['index'] for entry in entries} # Write the hierarchical information for entry in entries: children = entry.pop('children') if children: entry['first_child'] = id_to_index[children[0]] entry['last_child'] = id_to_index[children[-1]] if 'parent_id' in entry: entry['parent'] = id_to_index[entry.pop('parent_id')] # Write the lengths def get_next_start(entry): enders = [e['offset'] for e in entries if e['depth'] <= entry['depth'] and e['offset'] > entry['offset']] if enders: return min(enders) return len(self.flows[0]) for entry in entries: entry['length'] = get_next_start(entry) - entry['offset'] self.has_tbs = apply_trailing_byte_sequences(entries, self.records, self.uncompressed_record_lengths) idx_type = NonLinearNCXIndex if is_non_linear else NCXIndex self.ncx_records = idx_type(entries)()