def parseNCX(self): indx_data = [] tag_fieldname_map = { 1: ['pos',0], 2: ['len',0], 3: ['noffs',0], 4: ['hlvl',0], 5: ['koffs',0], 6: ['pos_fid',0], 21: ['parent',0], 22: ['child1',0], 23: ['childn',0] } if self.ncxidx != 0xffffffff: outtbl, ctoc_text = self.mi.getIndexData(self.ncxidx, "NCX") if DEBUG_NCX: print ctoc_text print outtbl num = 0 for [text, tagMap] in outtbl: tmp = { 'name': text, 'pos': -1, 'len': 0, 'noffs': -1, 'text' : "Unknown Text", 'hlvl' : -1, 'kind' : "Unknown Kind", 'pos_fid' : None, 'parent' : -1, 'child1' : -1, 'childn' : -1, 'num' : num } for tag in tag_fieldname_map.keys(): [fieldname, i] = tag_fieldname_map[tag] if tag in tagMap: fieldvalue = tagMap[tag][i] if tag == 6: pos_fid = toBase32(fieldvalue,4) fieldvalue2 = tagMap[tag][i+1] pos_off = toBase32(fieldvalue2,10) fieldvalue = 'kindle:pos:fid:%s:off:%s' % (pos_fid, pos_off) tmp[fieldname] = fieldvalue if tag == 3: toctext = ctoc_text.get(fieldvalue, 'Unknown Text') if self.mh.codec != 'utf-8': toctext = unicode(toctext, self.mh.codec).encode('utf-8') tmp['text'] = toctext if tag == 5: kindtext = ctoc_text.get(fieldvalue, 'Unknown Kind') if self.mh.codec != 'utf-8': kindtext = unicode(kindtext, self.mh.codec).encode('utf-8') tmp['kind'] = kindtext indx_data.append(tmp) if DEBUG_NCX: print "record number: ", num print "name: ", tmp['name'], print "position", tmp['pos']," length: ", tmp['len'] print "text: ", tmp['text'] print "kind: ", tmp['kind'] print "heading level: ", tmp['hlvl'] print "parent:", tmp['parent'] print "first child: ",tmp['child1']," last child: ", tmp['childn'] print "pos_fid is ", tmp['pos_fid'] print "\n\n" num += 1 self.indx_data = indx_data return indx_data
def processMobi8(mh, metadata, sect, files, rscnames, pagemapproc, k8resc, obfuscate_data, apnxfile=None, epubver='2'): global DUMP global WRITE_RAW_DATA # extract raw markup langauge rawML = mh.getRawML() if DUMP or WRITE_RAW_DATA: outraw = os.path.join(files.k8dir, files.getInputFileBasename() + '.rawml') open(pathof(outraw), 'wb').write(rawML) # KF8 require other indexes which contain parsing information and the FDST info # to process the rawml back into the xhtml files, css files, svg image files, etc k8proc = K8Processor(mh, sect, files, DUMP) k8proc.buildParts(rawML) # collect information for the guide first guidetext = k8proc.getGuideText() # if the guide was empty, add in any guide info from metadata, such as StartOffset if not guidetext and 'StartOffset' in metadata.keys(): # Apparently, KG 2.5 carries over the StartOffset from the mobi7 part... # Taking that into account, we only care about the *last* StartOffset, which # should always be the correct one in these cases (the one actually pointing # to the right place in the mobi8 part). starts = metadata['StartOffset'] last_start = starts[-1] last_start = int(last_start) if last_start == 0xffffffff: last_start = 0 seq, idtext = k8proc.getFragTblInfo(last_start) filename, idtext = k8proc.getIDTagByPosFid(toBase32(seq), '0000000000') linktgt = filename if idtext != '': linktgt += '#' + idtext guidetext += '<reference type="text" href="Text/%s" />\n' % linktgt # if apnxfile is passed in use it for page map information if apnxfile is not None and pagemapproc is None: apnxdata = "00000000" + file(apnxfile, 'rb').read() pagemapproc = PageMapProcessor(mh, apnxdata) # generate the page map pagemapxml = '' if pagemapproc is not None: pagemapxml = pagemapproc.generateKF8PageMapXML(k8proc) outpm = os.path.join(files.k8oebps, 'page-map.xml') open(pathof(outpm), 'wb').write(pagemapxml) if DUMP: print pagemapproc.getNames() print pagemapproc.getOffsets() print "\n\nPage Map" print pagemapxml # process the toc ncx # ncx map keys: name, pos, len, noffs, text, hlvl, kind, pos_fid, parent, child1, childn, num print "Processing ncx / toc" ncx = ncxExtract(mh, files) ncx_data = ncx.parseNCX() # extend the ncx data with filenames and proper internal idtags for i in range(len(ncx_data)): ncxmap = ncx_data[i] [junk1, junk2, junk3, fid, junk4, off] = ncxmap['pos_fid'].split(':') filename, idtag = k8proc.getIDTagByPosFid(fid, off) ncxmap['filename'] = filename ncxmap['idtag'] = idtag ncx_data[i] = ncxmap # convert the rawML to a set of xhtml files print "Building an epub-like structure" htmlproc = XHTMLK8Processor(rscnames, k8proc) usedmap = htmlproc.buildXHTML() # write out the xhtml svg, and css files # fileinfo = [skelid|coverpage, dir, name] fileinfo = [] # first create a cover page if none exists if CREATE_COVER_PAGE: cover = CoverProcessor(files, metadata, rscnames) cover_img = cover.getImageName() need_to_create_cover_page = False if cover_img is not None: if k8resc is None or not k8resc.hasSpine(): part = k8proc.getPart(0) if part.find(cover_img) == -1: need_to_create_cover_page = True else: if "coverpage" not in k8resc.spine_idrefs.keys(): part = k8proc.getPart(int(k8resc.spine_order[0])) if part.find(cover_img) == -1: k8resc.prepend_to_spine("coverpage", "inserted", "no", None) if k8resc.spine_order[0] == "coverpage": need_to_create_cover_page = True if need_to_create_cover_page: filename = cover.getXHTMLName() fileinfo.append(["coverpage", 'Text', filename]) guidetext += cover.guide_toxml() cover.writeXHTML() n = k8proc.getNumberOfParts() for i in range(n): part = k8proc.getPart(i) [skelnum, dir, filename, beg, end, aidtext] = k8proc.getPartInfo(i) fileinfo.append([str(skelnum), dir, filename]) fname = os.path.join(files.k8oebps, dir, filename) open(pathof(fname), 'wb').write(part) n = k8proc.getNumberOfFlows() for i in range(1, n): [type, format, dir, filename] = k8proc.getFlowInfo(i) flowpart = k8proc.getFlow(i) if format == 'file': fileinfo.append([None, dir, filename]) fname = os.path.join(files.k8oebps, dir, filename) open(pathof(fname), 'wb').write(flowpart) # create the opf opf = OPFProcessor(files, metadata.copy(), fileinfo, rscnames, True, mh, usedmap, pagemapxml, guidetext, k8resc, epubver) uuid = opf.writeOPF(bool(obfuscate_data)) if opf.hasNCX(): # Create a toc.ncx. ncx.writeK8NCX(ncx_data, metadata) if opf.hasNAV(): # Create a navigation document. nav = NAVProcessor(files) nav.writeNAV(ncx_data, guidetext, metadata) # make an epub-like structure of it all print "Creating an epub-like file" files.makeEPUB(usedmap, obfuscate_data, uuid)
def processMobi8(mh, metadata, sect, files, imgnames, pagemapproc, k8resc, obfuscate_data, apnxfile=None, epubver='2'): global DUMP global WRITE_RAW_DATA # extract raw markup langauge rawML = mh.getRawML() if DUMP or WRITE_RAW_DATA: outraw = os.path.join(files.k8dir,files.getInputFileBasename() + '.rawml') open(pathof(outraw),'wb').write(rawML) # KF8 require other indexes which contain parsing information and the FDST info # to process the rawml back into the xhtml files, css files, svg image files, etc k8proc = K8Processor(mh, sect, files, DUMP) k8proc.buildParts(rawML) # collect information for the guide first guidetext = k8proc.getGuideText() # if the guide was empty, add in any guide info from metadata, such as StartOffset if not guidetext and 'StartOffset' in metadata.keys(): # Apparently, KG 2.5 carries over the StartOffset from the mobi7 part... # Taking that into account, we only care about the *last* StartOffset, which # should always be the correct one in these cases (the one actually pointing # to the right place in the mobi8 part). starts = metadata['StartOffset'] last_start = starts[-1] last_start = int(last_start) if last_start == 0xffffffff: last_start = 0 seq, idtext = k8proc.getFragTblInfo(last_start) filename, idtext = k8proc.getIDTagByPosFid(toBase32(seq), '0000000000') linktgt = filename if idtext != '': linktgt += '#' + idtext guidetext += '<reference type="text" href="Text/%s" />\n' % linktgt # if apnxfile is passed in use it for page map information if apnxfile is not None and pagemapproc is None: apnxdata = "00000000" + file(apnxfile, 'rb').read() pagemapproc = PageMapProcessor(mh, apnxdata) # generate the page map pagemapxml = '' if pagemapproc is not None: pagemapxml = pagemapproc.generateKF8PageMapXML(k8proc) outpm = os.path.join(files.k8oebps,'page-map.xml') open(pathof(outpm),'wb').write(pagemapxml) if DUMP: print pagemapproc.getNames() print pagemapproc.getOffsets() print "\n\nPage Map" print pagemapxml # process the toc ncx # ncx map keys: name, pos, len, noffs, text, hlvl, kind, pos_fid, parent, child1, childn, num print "Processing ncx / toc" ncx = ncxExtract(mh, files) ncx_data = ncx.parseNCX() # extend the ncx data with filenames and proper internal idtags for i in range(len(ncx_data)): ncxmap = ncx_data[i] [junk1, junk2, junk3, fid, junk4, off] = ncxmap['pos_fid'].split(':') filename, idtag = k8proc.getIDTagByPosFid(fid, off) ncxmap['filename'] = filename ncxmap['idtag'] = idtag ncx_data[i] = ncxmap # convert the rawML to a set of xhtml files print "Building an epub-like structure" htmlproc = XHTMLK8Processor(imgnames, k8proc) usedmap = htmlproc.buildXHTML() # write out the xhtml svg, and css files # fileinfo = [skelid|coverpage, dir, name] fileinfo = [] # first create a cover page if none exists if CREATE_COVER_PAGE: cover = CoverProcessor(files, metadata, imgnames) cover_img = cover.getImageName() need_to_create_cover_page = False if cover_img is not None: if k8resc is None or not k8resc.hasSpine(): part = k8proc.getPart(0) if part.find(cover_img) == -1: need_to_create_cover_page = True else: if "coverpage" not in k8resc.spine_idrefs.keys(): part = k8proc.getPart(int(k8resc.spine_order[0])) if part.find(cover_img) == -1: k8resc.prepend_to_spine("coverpage", "inserted", "no", None) if k8resc.spine_order[0] == "coverpage": need_to_create_cover_page = True if need_to_create_cover_page: filename = cover.getXHTMLName() fileinfo.append(["coverpage", 'Text', filename]) guidetext += cover.guide_toxml() cover.writeXHTML() n = k8proc.getNumberOfParts() for i in range(n): part = k8proc.getPart(i) [skelnum, dir, filename, beg, end, aidtext] = k8proc.getPartInfo(i) fileinfo.append([str(skelnum), dir, filename]) fname = os.path.join(files.k8oebps,dir,filename) open(pathof(fname),'wb').write(part) n = k8proc.getNumberOfFlows() for i in range(1, n): [type, format, dir, filename] = k8proc.getFlowInfo(i) flowpart = k8proc.getFlow(i) if format == 'file': fileinfo.append([None, dir, filename]) fname = os.path.join(files.k8oebps,dir,filename) open(pathof(fname),'wb').write(flowpart) # create the opf opf = OPFProcessor(files, metadata.copy(), fileinfo, imgnames, True, mh, usedmap, pagemapxml, guidetext, k8resc, epubver) uuid = opf.writeOPF(bool(obfuscate_data)) if opf.hasNCX(): # Create a toc.ncx. ncx.writeK8NCX(ncx_data, metadata) if opf.hasNAV(): # Create a navigation document. nav = NAVProcessor(files) nav.writeNAV(ncx_data, guidetext, metadata) # make an epub-like structure of it all print "Creating an epub-like file" files.makeEPUB(usedmap, obfuscate_data, uuid)