Exemplo n.º 1
0
    def parseHeader(self, data):
        "read INDX header"
        if not data[:4] == b'INDX':
            print("Warning: index section is not INDX")
            return False
        words = ('len', 'nul1', 'type', 'gen', 'start', 'count', 'code', 'lng',
                 'total', 'ordt', 'ligt', 'nligt', 'nctoc')
        num = len(words)
        values = struct.unpack(bstr('>%dL' % num), data[4:4 * (num + 1)])
        header = {}
        for n in range(num):
            header[words[n]] = values[n]

        ordt1 = None
        ordt2 = None

        otype, oentries, op1, op2, otagx = struct.unpack_from(
            b'>LLLLL', data, 0xa4)
        header['otype'] = otype
        header['oentries'] = oentries

        if DEBUG_DICT:
            print("otype %d, oentries %d, op1 %d, op2 %d, otagx %d" %
                  (otype, oentries, op1, op2, otagx))

        if header['code'] == 0xfdea or oentries > 0:
            # some dictionaries seem to be codepage 65002 (0xFDEA) which seems
            # to be some sort of strange EBCDIC utf-8 or 16 encoded strings
            # So we need to look for them and store them away to process leading text
            # ORDT1 has 1 byte long entries, ORDT2 has 2 byte long entries
            # we only ever seem to use the second but ...
            #
            # if otype = 0, ORDT table uses 16 bit values as offsets into the table
            # if otype = 1, ORDT table uses 8 bit values as offsets inot the table

            assert (data[op1:op1 + 4] == b'ORDT')
            assert (data[op2:op2 + 4] == b'ORDT')
            ordt1 = struct.unpack_from(bstr('>%dB' % oentries), data, op1 + 4)
            ordt2 = struct.unpack_from(bstr('>%dH' % oentries), data, op2 + 4)

        if DEBUG_DICT:
            print("parsed INDX header:")
            for key in header:
                print(
                    key,
                    "%x" % header[key],
                )
            print("\n")
        return header, ordt1, ordt2
Exemplo n.º 2
0
    def parseINDXHeader(self, data):
        "read INDX header"
        if not data[:4] == b'INDX':
            print("Warning: index section is not INDX")
            return False
        words = ('len', 'nul1', 'type', 'gen', 'start', 'count', 'code', 'lng',
                 'total', 'ordt', 'ligt', 'nligt', 'nctoc')
        num = len(words)
        values = struct.unpack(bstr('>%dL' % num), data[4:4 * (num + 1)])
        header = {}
        for n in range(num):
            header[words[n]] = values[n]

        ordt1 = None
        ordt2 = None

        ocnt, oentries, op1, op2, otagx = struct.unpack_from(
            b'>LLLLL', data, 0xa4)
        if header['code'] == 0xfdea or ocnt != 0 or oentries > 0:
            # horribly hacked up ESP (sample) mobi books use two ORDT sections but never specify
            # them in the proper place in the header.  They seem to be codepage 65002 which seems
            # to be some sort of strange EBCDIC utf-8 or 16 encoded strings

            # so we need to look for them and store them away to process leading text
            # ORDT1 has 1 byte long entries, ORDT2 has 2 byte long entries
            # we only ever seem to use the seocnd but ...
            assert (ocnt == 1)
            assert (data[op1:op1 + 4] == b'ORDT')
            assert (data[op2:op2 + 4] == b'ORDT')
            ordt1 = struct.unpack_from(bstr('>%dB' % oentries), data, op1 + 4)
            ordt2 = struct.unpack_from(bstr('>%dH' % oentries), data, op2 + 4)

        if self.DEBUG:
            print("parsed INDX header:")
            for n in words:
                print(
                    n,
                    "%X" % header[n],
                )
            print("")
        return header, ordt1, ordt2
Exemplo n.º 3
0
    def loadCdic(self, cdic):
        if cdic[0:8] != b'CDIC\x00\x00\x00\x10':
            raise unpackException('invalid cdic header')
        phrases, bits = struct.unpack_from(b'>LL', cdic, 8)
        n = min(1 << bits, phrases - len(self.dictionary))
        h = struct.Struct(b'>H').unpack_from

        def getslice(off):
            blen, = h(cdic, 16 + off)
            slice = cdic[18 + off:18 + off + (blen & 0x7fff)]
            return (slice, blen & 0x8000)

        self.dictionary += lmap(getslice,
                                struct.unpack_from(bstr('>%dH' % n), cdic, 16))
Exemplo n.º 4
0
 def __init__(self, filename):
     self.data = b''
     with open(pathof(filename), 'rb') as f:
         self.data = f.read()
     self.palmheader = self.data[:78]
     self.palmname = self.data[:32]
     self.ident = self.palmheader[0x3C:0x3C + 8]
     self.num_sections, = struct.unpack_from(b'>H', self.palmheader, 76)
     self.filelength = len(self.data)
     sectionsdata = struct.unpack_from(
         bstr('>%dL' % (self.num_sections * 2)), self.data,
         78) + (self.filelength, 0)
     self.sectionoffsets = sectionsdata[::2]
     self.sectionattributes = sectionsdata[1::2]
     self.sectiondescriptions = ["" for x in range(self.num_sections + 1)]
     self.sectiondescriptions[-1] = "File Length Only"
     return
 def __init__(self, filename):
     self.data = open(filename, 'rb').read()
     if self.data[:3] == b'TPZ':
         self.ident = 'TPZ'
     else:
         self.palmheader = self.data[:78]
         self.ident = self.palmheader[0x3C:0x3C+8]
     try:
         self.num_sections, = struct.unpack_from(b'>H', self.palmheader, 76)
     except:
         return
     self.filelength = len(self.data)
     try:
         sectionsdata = struct.unpack_from(bstr('>%dL' % (self.num_sections*2)), self.data, 78) + (self.filelength, 0)
         self.sectionoffsets = sectionsdata[::2]
     except:
         pass
 def __init__(self, filename):
     self.data = open(filename, 'rb').read()
     if self.data[:3] == b'TPZ':
         self.ident = 'TPZ'
     else:
         self.palmheader = self.data[:78]
         self.ident = self.palmheader[0x3C:0x3C+8]
     try:
         self.num_sections, = struct.unpack_from(b'>H', self.palmheader, 76)
     except Exception:
         return
     self.filelength = len(self.data)
     try:
         sectionsdata = struct.unpack_from(bstr('>%dL' % (self.num_sections*2)), self.data, 78) + (self.filelength, 0)
         self.sectionoffsets = sectionsdata[::2]
     except Exception:
         pass
Exemplo n.º 7
0
    def __init__(self, mh, sect, files, debug=False):
        self.sect = sect
        self.files = files
        self.mi = MobiIndex(sect)
        self.mh = mh
        self.skelidx = mh.skelidx
        self.fragidx = mh.fragidx
        self.guideidx = mh.guideidx
        self.fdst = mh.fdst
        self.flowmap = {}
        self.flows = None
        self.flowinfo = []
        self.parts = None
        self.partinfo = []
        self.linked_aids = set()
        self.fdsttbl = [0, 0xffffffff]
        self.DEBUG = debug

        # read in and parse the FDST info which is very similar in format to the Palm DB section
        # parsing except it provides offsets into rawML file and not the Palm DB file
        # this is needed to split up the final css, svg, etc flow section
        # that can exist at the end of the rawML file
        if self.fdst != 0xffffffff:
            header = self.sect.loadSection(self.fdst)
            if header[0:4] == b"FDST":
                num_sections, = struct.unpack_from(b'>L', header, 0x08)
                self.fdsttbl = struct.unpack_from(
                    bstr('>%dL' %
                         (num_sections * 2)), header, 12)[::2] + (mh.rawSize, )
                sect.setsectiondescription(self.fdst, "KF8 FDST INDX")
                if self.DEBUG:
                    print("\nFDST Section Map:  %d sections" % num_sections)
                    for j in range(num_sections):
                        print("Section %d: 0x%08X - 0x%08X" %
                              (j, self.fdsttbl[j], self.fdsttbl[j + 1]))
            else:
                print("\nError: K8 Mobi with Missing FDST info")

        # read/process skeleton index info to create the skeleton table
        skeltbl = []
        if self.skelidx != 0xffffffff:
            # for i in range(2):
            #     fname = 'skel%04d.dat' % i
            #     data = self.sect.loadSection(self.skelidx + i)
            #     with open(pathof(fname), 'wb') as f:
            #         f.write(data)
            outtbl, ctoc_text = self.mi.getIndexData(self.skelidx,
                                                     "KF8 Skeleton")
            fileptr = 0
            for [text, tagMap] in outtbl:
                # file number, skeleton name, fragtbl record count, start position, length
                skeltbl.append(
                    [fileptr, text, tagMap[1][0], tagMap[6][0], tagMap[6][1]])
                fileptr += 1
        self.skeltbl = skeltbl
        if self.DEBUG:
            print("\nSkel Table:  %d entries" % len(self.skeltbl))
            print(
                "table: filenum, skeleton name, frag tbl record count, start position, length"
            )
            for j in range(len(self.skeltbl)):
                print(self.skeltbl[j])

        # read/process the fragment index to create the fragment table
        fragtbl = []
        if self.fragidx != 0xffffffff:
            # for i in range(3):
            #     fname = 'frag%04d.dat' % i
            #     data = self.sect.loadSection(self.fragidx + i)
            #     with open(pathof(fname), 'wb') as f:
            #         f.write(data)
            outtbl, ctoc_text = self.mi.getIndexData(self.fragidx,
                                                     "KF8 Fragment")
            for [text, tagMap] in outtbl:
                # insert position, ctoc offset (aidtext), file number, sequence number, start position, length
                ctocoffset = tagMap[2][0]
                ctocdata = ctoc_text[ctocoffset]
                fragtbl.append([
                    int(text), ctocdata, tagMap[3][0], tagMap[4][0],
                    tagMap[6][0], tagMap[6][1]
                ])
        self.fragtbl = fragtbl
        if self.DEBUG:
            print("\nFragment Table: %d entries" % len(self.fragtbl))
            print(
                "table: file position, link id text, file num, sequence number, start position, length"
            )
            for j in range(len(self.fragtbl)):
                print(self.fragtbl[j])

        # read / process guide index for guide elements of opf
        guidetbl = []
        if self.guideidx != 0xffffffff:
            # for i in range(3):
            #     fname = 'guide%04d.dat' % i
            #     data = self.sect.loadSection(self.guideidx + i)
            #     with open(pathof(fname), 'wb') as f:
            #         f.write(data)
            outtbl, ctoc_text = self.mi.getIndexData(self.guideidx,
                                                     "KF8 Guide elements)")
            for [text, tagMap] in outtbl:
                # ref_type, ref_title, frag number
                ctocoffset = tagMap[1][0]
                ref_title = ctoc_text[ctocoffset]
                ref_type = text
                fileno = None
                if 3 in tagMap:
                    fileno = tagMap[3][0]
                if 6 in tagMap:
                    fileno = tagMap[6][0]
                guidetbl.append([ref_type, ref_title, fileno])
        self.guidetbl = guidetbl
        if self.DEBUG:
            print("\nGuide Table: %d entries" % len(self.guidetbl))
            print("table: ref_type, ref_title, fragtbl entry number")
            for j in range(len(self.guidetbl)):
                print(self.guidetbl[j])