def parseHeader(self, data): "read INDX header" if not data[:4] == b'INDX': print("Warning: index section is not INDX") return False words = ('len', 'nul1', 'type', 'gen', 'start', 'count', 'code', 'lng', 'total', 'ordt', 'ligt', 'nligt', 'nctoc') num = len(words) values = struct.unpack(bstr('>%dL' % num), data[4:4 * (num + 1)]) header = {} for n in range(num): header[words[n]] = values[n] ordt1 = None ordt2 = None otype, oentries, op1, op2, otagx = struct.unpack_from( b'>LLLLL', data, 0xa4) header['otype'] = otype header['oentries'] = oentries if DEBUG_DICT: print("otype %d, oentries %d, op1 %d, op2 %d, otagx %d" % (otype, oentries, op1, op2, otagx)) if header['code'] == 0xfdea or oentries > 0: # some dictionaries seem to be codepage 65002 (0xFDEA) which seems # to be some sort of strange EBCDIC utf-8 or 16 encoded strings # So we need to look for them and store them away to process leading text # ORDT1 has 1 byte long entries, ORDT2 has 2 byte long entries # we only ever seem to use the second but ... # # if otype = 0, ORDT table uses 16 bit values as offsets into the table # if otype = 1, ORDT table uses 8 bit values as offsets inot the table assert (data[op1:op1 + 4] == b'ORDT') assert (data[op2:op2 + 4] == b'ORDT') ordt1 = struct.unpack_from(bstr('>%dB' % oentries), data, op1 + 4) ordt2 = struct.unpack_from(bstr('>%dH' % oentries), data, op2 + 4) if DEBUG_DICT: print("parsed INDX header:") for key in header: print( key, "%x" % header[key], ) print("\n") return header, ordt1, ordt2
def parseINDXHeader(self, data): "read INDX header" if not data[:4] == b'INDX': print("Warning: index section is not INDX") return False words = ('len', 'nul1', 'type', 'gen', 'start', 'count', 'code', 'lng', 'total', 'ordt', 'ligt', 'nligt', 'nctoc') num = len(words) values = struct.unpack(bstr('>%dL' % num), data[4:4 * (num + 1)]) header = {} for n in range(num): header[words[n]] = values[n] ordt1 = None ordt2 = None ocnt, oentries, op1, op2, otagx = struct.unpack_from( b'>LLLLL', data, 0xa4) if header['code'] == 0xfdea or ocnt != 0 or oentries > 0: # horribly hacked up ESP (sample) mobi books use two ORDT sections but never specify # them in the proper place in the header. They seem to be codepage 65002 which seems # to be some sort of strange EBCDIC utf-8 or 16 encoded strings # so we need to look for them and store them away to process leading text # ORDT1 has 1 byte long entries, ORDT2 has 2 byte long entries # we only ever seem to use the seocnd but ... assert (ocnt == 1) assert (data[op1:op1 + 4] == b'ORDT') assert (data[op2:op2 + 4] == b'ORDT') ordt1 = struct.unpack_from(bstr('>%dB' % oentries), data, op1 + 4) ordt2 = struct.unpack_from(bstr('>%dH' % oentries), data, op2 + 4) if self.DEBUG: print("parsed INDX header:") for n in words: print( n, "%X" % header[n], ) print("") return header, ordt1, ordt2
def loadCdic(self, cdic): if cdic[0:8] != b'CDIC\x00\x00\x00\x10': raise unpackException('invalid cdic header') phrases, bits = struct.unpack_from(b'>LL', cdic, 8) n = min(1 << bits, phrases - len(self.dictionary)) h = struct.Struct(b'>H').unpack_from def getslice(off): blen, = h(cdic, 16 + off) slice = cdic[18 + off:18 + off + (blen & 0x7fff)] return (slice, blen & 0x8000) self.dictionary += lmap(getslice, struct.unpack_from(bstr('>%dH' % n), cdic, 16))
def __init__(self, filename): self.data = b'' with open(pathof(filename), 'rb') as f: self.data = f.read() self.palmheader = self.data[:78] self.palmname = self.data[:32] self.ident = self.palmheader[0x3C:0x3C + 8] self.num_sections, = struct.unpack_from(b'>H', self.palmheader, 76) self.filelength = len(self.data) sectionsdata = struct.unpack_from( bstr('>%dL' % (self.num_sections * 2)), self.data, 78) + (self.filelength, 0) self.sectionoffsets = sectionsdata[::2] self.sectionattributes = sectionsdata[1::2] self.sectiondescriptions = ["" for x in range(self.num_sections + 1)] self.sectiondescriptions[-1] = "File Length Only" return
def __init__(self, filename): self.data = open(filename, 'rb').read() if self.data[:3] == b'TPZ': self.ident = 'TPZ' else: self.palmheader = self.data[:78] self.ident = self.palmheader[0x3C:0x3C+8] try: self.num_sections, = struct.unpack_from(b'>H', self.palmheader, 76) except: return self.filelength = len(self.data) try: sectionsdata = struct.unpack_from(bstr('>%dL' % (self.num_sections*2)), self.data, 78) + (self.filelength, 0) self.sectionoffsets = sectionsdata[::2] except: pass
def __init__(self, filename): self.data = open(filename, 'rb').read() if self.data[:3] == b'TPZ': self.ident = 'TPZ' else: self.palmheader = self.data[:78] self.ident = self.palmheader[0x3C:0x3C+8] try: self.num_sections, = struct.unpack_from(b'>H', self.palmheader, 76) except Exception: return self.filelength = len(self.data) try: sectionsdata = struct.unpack_from(bstr('>%dL' % (self.num_sections*2)), self.data, 78) + (self.filelength, 0) self.sectionoffsets = sectionsdata[::2] except Exception: pass
def __init__(self, mh, sect, files, debug=False): self.sect = sect self.files = files self.mi = MobiIndex(sect) self.mh = mh self.skelidx = mh.skelidx self.fragidx = mh.fragidx self.guideidx = mh.guideidx self.fdst = mh.fdst self.flowmap = {} self.flows = None self.flowinfo = [] self.parts = None self.partinfo = [] self.linked_aids = set() self.fdsttbl = [0, 0xffffffff] self.DEBUG = debug # read in and parse the FDST info which is very similar in format to the Palm DB section # parsing except it provides offsets into rawML file and not the Palm DB file # this is needed to split up the final css, svg, etc flow section # that can exist at the end of the rawML file if self.fdst != 0xffffffff: header = self.sect.loadSection(self.fdst) if header[0:4] == b"FDST": num_sections, = struct.unpack_from(b'>L', header, 0x08) self.fdsttbl = struct.unpack_from( bstr('>%dL' % (num_sections * 2)), header, 12)[::2] + (mh.rawSize, ) sect.setsectiondescription(self.fdst, "KF8 FDST INDX") if self.DEBUG: print("\nFDST Section Map: %d sections" % num_sections) for j in range(num_sections): print("Section %d: 0x%08X - 0x%08X" % (j, self.fdsttbl[j], self.fdsttbl[j + 1])) else: print("\nError: K8 Mobi with Missing FDST info") # read/process skeleton index info to create the skeleton table skeltbl = [] if self.skelidx != 0xffffffff: # for i in range(2): # fname = 'skel%04d.dat' % i # data = self.sect.loadSection(self.skelidx + i) # with open(pathof(fname), 'wb') as f: # f.write(data) outtbl, ctoc_text = self.mi.getIndexData(self.skelidx, "KF8 Skeleton") fileptr = 0 for [text, tagMap] in outtbl: # file number, skeleton name, fragtbl record count, start position, length skeltbl.append( [fileptr, text, tagMap[1][0], tagMap[6][0], tagMap[6][1]]) fileptr += 1 self.skeltbl = skeltbl if self.DEBUG: print("\nSkel Table: %d entries" % len(self.skeltbl)) print( "table: filenum, skeleton name, frag tbl record count, start position, length" ) for j in range(len(self.skeltbl)): print(self.skeltbl[j]) # read/process the fragment index to create the fragment table fragtbl = [] if self.fragidx != 0xffffffff: # for i in range(3): # fname = 'frag%04d.dat' % i # data = self.sect.loadSection(self.fragidx + i) # with open(pathof(fname), 'wb') as f: # f.write(data) outtbl, ctoc_text = self.mi.getIndexData(self.fragidx, "KF8 Fragment") for [text, tagMap] in outtbl: # insert position, ctoc offset (aidtext), file number, sequence number, start position, length ctocoffset = tagMap[2][0] ctocdata = ctoc_text[ctocoffset] fragtbl.append([ int(text), ctocdata, tagMap[3][0], tagMap[4][0], tagMap[6][0], tagMap[6][1] ]) self.fragtbl = fragtbl if self.DEBUG: print("\nFragment Table: %d entries" % len(self.fragtbl)) print( "table: file position, link id text, file num, sequence number, start position, length" ) for j in range(len(self.fragtbl)): print(self.fragtbl[j]) # read / process guide index for guide elements of opf guidetbl = [] if self.guideidx != 0xffffffff: # for i in range(3): # fname = 'guide%04d.dat' % i # data = self.sect.loadSection(self.guideidx + i) # with open(pathof(fname), 'wb') as f: # f.write(data) outtbl, ctoc_text = self.mi.getIndexData(self.guideidx, "KF8 Guide elements)") for [text, tagMap] in outtbl: # ref_type, ref_title, frag number ctocoffset = tagMap[1][0] ref_title = ctoc_text[ctocoffset] ref_type = text fileno = None if 3 in tagMap: fileno = tagMap[3][0] if 6 in tagMap: fileno = tagMap[6][0] guidetbl.append([ref_type, ref_title, fileno]) self.guidetbl = guidetbl if self.DEBUG: print("\nGuide Table: %d entries" % len(self.guidetbl)) print("table: ref_type, ref_title, fragtbl entry number") for j in range(len(self.guidetbl)): print(self.guidetbl[j])