Пример #1
0
 def __init__(self, raw):
     if raw[:4] != b"FDST":
         raise ValueError("KF8 does not have a valid FDST record")
     self.sec_off, self.num_sections = struct.unpack_from(b">LL", raw, 4)
     if self.sec_off != 12:
         raise ValueError("FDST record has unknown extra fields")
     secf = b">%dL" % (self.num_sections * 2)
     secs = struct.unpack_from(secf, raw, self.sec_off)
     rest = raw[self.sec_off + struct.calcsize(secf) :]
     if rest:
         raise ValueError("FDST record has trailing data: " "%s" % format_bytes(rest))
     self.sections = tuple(izip(secs[::2], secs[1::2]))
Пример #2
0
    def dump_record(self, r, dat):
        ans = []
        ans.append('\nRecord #%d: Starts at: %d Ends at: %d' %
                   (r.idx, dat['geom'][0], dat['geom'][1]))
        s, e, c = dat['starts'], dat['ends'], dat['complete']
        ans.append(('\tContains: %d index entries '
                    '(%d ends, %d complete, %d starts)') %
                   tuple(map(len, (s + e + c, e, c, s))))
        byts = bytearray(r.trailing_data.get('indexing', b''))
        ans.append('TBS bytes: %s' % format_bytes(byts))
        for typ, entries in (('Ends', e), ('Complete', c), ('Starts', s)):
            if entries:
                ans.append('\t%s:' % typ)
                for x in entries:
                    ans.append(('\t\tIndex Entry: %s (Parent index: %s, '
                                'Depth: %d, Offset: %d, Size: %d) [%s]') %
                               (x.index, x.parent_index, x.depth, x.offset,
                                x.size, x.label))

        def bin4(num):
            ans = bin(num)[2:]
            return as_bytes('0' * (4 - len(ans)) + ans)

        def repr_extra(x):
            return str({bin4(k): v for k, v in iteritems(extra)})

        tbs_type = 0
        is_periodical = self.doc_type in (257, 258, 259)
        if len(byts):
            outermost_index, extra, consumed = decode_tbs(byts, flag_size=3)
            byts = byts[consumed:]
            for k in extra:
                tbs_type |= k
            ans.append('\nTBS: %d (%s)' % (tbs_type, bin4(tbs_type)))
            ans.append('Outermost index: %d' % outermost_index)
            ans.append('Unknown extra start bytes: %s' % repr_extra(extra))
            if is_periodical:  # Hierarchical periodical
                try:
                    byts, a = self.interpret_periodical(
                        tbs_type, byts, dat['geom'][0])
                except:
                    import traceback
                    traceback.print_exc()
                    a = []
                    print('Failed to decode TBS bytes for record: %d' % r.idx)
                ans += a
            if byts:
                sbyts = tuple(hex(b)[2:] for b in byts)
                ans.append('Remaining bytes: %s' % ' '.join(sbyts))

        ans.append('')
        return tbs_type, ans
Пример #3
0
 def __init__(self, raw):
     if raw[:4] != b'FDST':
         raise ValueError('KF8 does not have a valid FDST record')
     self.sec_off, self.num_sections = struct.unpack_from(b'>LL', raw, 4)
     if self.sec_off != 12:
         raise ValueError('FDST record has unknown extra fields')
     secf = b'>%dL' % (self.num_sections*2)
     secs = struct.unpack_from(secf, raw, self.sec_off)
     rest = raw[self.sec_off+struct.calcsize(secf):]
     if rest:
         raise ValueError('FDST record has trailing data: '
                 '%s'%format_bytes(rest))
     self.sections = tuple(zip(secs[::2], secs[1::2]))
Пример #4
0
 def __init__(self, raw):
     if raw[:4] != b'FDST':
         raise ValueError('KF8 does not have a valid FDST record')
     self.sec_off, self.num_sections = struct.unpack_from(b'>LL', raw, 4)
     if self.sec_off != 12:
         raise ValueError('FDST record has unknown extra fields')
     secf = b'>%dL' % (self.num_sections * 2)
     secs = struct.unpack_from(secf, raw, self.sec_off)
     rest = raw[self.sec_off + struct.calcsize(secf):]
     if rest:
         raise ValueError('FDST record has trailing data: '
                          '%s' % format_bytes(rest))
     self.sections = tuple(izip(secs[::2], secs[1::2]))
Пример #5
0
    def dump_record(self, r, dat):
        ans = []
        ans.append('\nRecord #%d: Starts at: %d Ends at: %d'%(r.idx,
            dat['geom'][0], dat['geom'][1]))
        s, e, c = dat['starts'], dat['ends'], dat['complete']
        ans.append(('\tContains: %d index entries '
            '(%d ends, %d complete, %d starts)')%tuple(map(len, (s+e+c, e,
                c, s))))
        byts = bytearray(r.trailing_data.get('indexing', b''))
        ans.append('TBS bytes: %s'%format_bytes(byts))
        for typ, entries in (('Ends', e), ('Complete', c), ('Starts', s)):
            if entries:
                ans.append('\t%s:'%typ)
                for x in entries:
                    ans.append(('\t\tIndex Entry: %s (Parent index: %s, '
                            'Depth: %d, Offset: %d, Size: %d) [%s]')%(
                        x.index, x.parent_index, x.depth, x.offset, x.size, x.label))

        def bin4(num):
            ans = bin(num)[2:]
            return as_bytes('0'*(4-len(ans)) + ans)

        def repr_extra(x):
            return str({bin4(k):v for k, v in iteritems(extra)})

        tbs_type = 0
        is_periodical = self.doc_type in (257, 258, 259)
        if len(byts):
            outermost_index, extra, consumed = decode_tbs(byts, flag_size=3)
            byts = byts[consumed:]
            for k in extra:
                tbs_type |= k
            ans.append('\nTBS: %d (%s)'%(tbs_type, bin4(tbs_type)))
            ans.append('Outermost index: %d'%outermost_index)
            ans.append('Unknown extra start bytes: %s'%repr_extra(extra))
            if is_periodical:  # Hierarchical periodical
                try:
                    byts, a = self.interpret_periodical(tbs_type, byts,
                        dat['geom'][0])
                except:
                    import traceback
                    traceback.print_exc()
                    a = []
                    print('Failed to decode TBS bytes for record: %d'%r.idx)
                ans += a
            if byts:
                sbyts = tuple(hex(b)[2:] for b in byts)
                ans.append('Remaining bytes: %s'%' '.join(sbyts))

        ans.append('')
        return tbs_type, ans
Пример #6
0
 def __init__(self, records, codec):
     self.records = OrderedDict()
     record_offset = 0
     for record in records:
         raw = record.raw
         pos = 0
         while pos < len(raw):
             length, consumed = decint(raw[pos:])
             if length > 0:
                 try:
                     self.records[pos+record_offset] = raw[
                         pos+consumed:pos+consumed+length].decode(codec)
                 except:
                     byts = raw[pos:]
                     r = format_bytes(byts)
                     print('CNCX entry at offset %d has unknown format %s'%(
                         pos+record_offset, r))
                     self.records[pos+record_offset] = r
                     pos = len(raw)
             pos += consumed+length
         record_offset += 0x10000
Пример #7
0
 def __init__(self, records, codec):
     self.records = OrderedDict()
     record_offset = 0
     for record in records:
         raw = record.raw
         pos = 0
         while pos < len(raw):
             length, consumed = decint(raw[pos:])
             if length > 0:
                 try:
                     self.records[pos+record_offset] = raw[
                         pos+consumed:pos+consumed+length].decode(codec)
                 except:
                     byts = raw[pos:]
                     r = format_bytes(byts)
                     print('CNCX entry at offset %d has unknown format %s'%(
                         pos+record_offset, r))
                     self.records[pos+record_offset] = r
                     pos = len(raw)
             pos += consumed+length
         record_offset += 0x10000
Пример #8
0
    def __str__(self):
        ans = ['*' * 20 + ' MOBI %d Header ' % self.file_version + '*' * 20]

        a = ans.append

        def i(d, x):
            x = 'NULL' if x == NULL_INDEX else x
            a('%s: %s' % (d, x))

        def r(d, attr):
            x = getattr(self, attr)
            if attr in self.relative_records and x != NULL_INDEX:
                a('%s: Absolute: %d Relative: %d' %
                  (d, x, x - self.header_offset))
            else:
                i(d, x)

        a('Compression: %s' % self.compression)
        a('Unused: %r' % self.unused)
        a('Text length: %d' % self.text_length)
        a('Number of text records: %d' % self.number_of_text_records)
        a('Text record size: %d' % self.text_record_size)
        a('Encryption: %s' % self.encryption_type)
        a('Unknown: %r' % self.unknown)
        a('Identifier: %r' % self.identifier)
        a('Header length: %d' % self.length)
        a('Type: %s' % self.type)
        a('Encoding: %s' % self.encoding)
        a('UID: %r' % self.uid)
        a('File version: %d' % self.file_version)
        r('Meta Orth Index', 'meta_orth_indx')
        r('Meta Infl Index', 'meta_infl_indx')
        r('Secondary index record', 'secondary_index_record')
        a('Reserved: %r' % self.reserved)
        r('First non-book record', 'first_non_book_record')
        a('Full name offset: %d' % self.fullname_offset)
        a('Full name length: %d bytes' % self.fullname_length)
        a('Langcode: %r' % self.locale_raw)
        a('Language: %s' % self.language)
        a('Sub language: %s' % self.sublanguage)
        a('Input language: %r' % self.input_language)
        a('Output language: %r' % self.output_langauage)
        a('Min version: %d' % self.min_version)
        r('First Image index', 'first_image_index')
        r('Huffman record offset', 'huffman_record_offset')
        a('Huffman record count: %d' % self.huffman_record_count)
        r('Huffman table offset', 'datp_record_offset')
        a('Huffman table length: %r' % self.datp_record_count)
        a('EXTH flags: %s (%s)' % (bin(self.exth_flags)[2:], self.has_exth))
        if self.has_drm_data:
            a('Unknown3: %r' % self.unknown3)
            r('DRM Offset', 'drm_offset')
            a('DRM Count: %s' % self.drm_count)
            a('DRM Size: %s' % self.drm_size)
            a('DRM Flags: %r' % self.drm_flags)
        if self.has_extra_data_flags:
            a('Unknown4: %r' % self.unknown4)
            if hasattr(self, 'first_text_record'):
                a('First content record: %d' % self.first_text_record)
                a('Last content record: %d' % self.last_text_record)
            else:
                r('FDST Index', 'fdst_idx')
            a('FDST Count: %d' % self.fdst_count)
            r('FCIS number', 'fcis_number')
            a('FCIS count: %d' % self.fcis_count)
            r('FLIS number', 'flis_number')
            a('FLIS count: %d' % self.flis_count)
            a('Unknown6: %r' % self.unknown6)
            r('SRCS record index', 'srcs_record_index')
            a('Number of SRCS records?: %d' % self.num_srcs_records)
            a('Unknown7: %r' % self.unknown7)
            a(('Extra data flags: %s (has multibyte: %s) '
               '(has indexing: %s) (has uncrossable breaks: %s)') %
              (bin(self.extra_data_flags), self.has_multibytes,
               self.has_indexing_bytes, self.has_uncrossable_breaks))
            r('NCX index', 'primary_index_record')
        if self.length >= 248:
            r('Sections Index', 'sect_idx')
            r('SKEL Index', 'skel_idx')
            r('DATP Index', 'datp_idx')
            r('Other Index', 'oth_idx')
            if self.unknown9:
                a('Unknown9: %r' % self.unknown9)

        ans = '\n'.join(ans)

        if self.has_exth:
            ans += '\n\n' + str(self.exth)
            ans += '\n\nBytes after EXTH (%d bytes): %s' % (len(
                self.bytes_after_exth), format_bytes(self.bytes_after_exth))

        ans += '\nNumber of bytes after full name: %d' % (
            len(self.raw) - (self.fullname_offset + self.fullname_length))

        ans += '\nRecord 0 length: %d' % len(self.raw)
        return ans
Пример #9
0
    def read_tbs(self):
        from calibre.ebooks.mobi.writer8.tbs import (Entry, DOC,
                collect_indexing_data, encode_strands_as_sequences,
                sequences_to_bytes, calculate_all_tbs, NegativeStrandIndex)
        entry_map = []
        for index in self.ncx_index:
            vals = list(index)[:-1] + [None, None, None, None]
            entry_map.append(Entry(*(vals[:12])))

        indexing_data = collect_indexing_data(entry_map, list(map(len,
            self.text_records)))
        self.indexing_data = [DOC + '\n' +textwrap.dedent('''\
                Index Entry lines are of the form:
                depth:index_number [action] parent (index_num-parent) Geometry

                Where Geometry is the start and end of the index entry w.r.t
                the start of the text record.

                ''')]

        tbs_type = 8
        try:
            calculate_all_tbs(indexing_data)
        except NegativeStrandIndex:
            calculate_all_tbs(indexing_data, tbs_type=5)
            tbs_type = 5

        for i, strands in enumerate(indexing_data):
            rec = self.text_records[i]
            tbs_bytes = rec.trailing_data.get('indexing', b'')
            desc = ['Record #%d'%i]
            for s, strand in enumerate(strands):
                desc.append('Strand %d'%s)
                for entries in itervalues(strand):
                    for e in entries:
                        desc.append(
                        ' %s%d [%-9s] parent: %s (%d) Geometry: (%d, %d)'%(
                            e.depth * ('  ') + '- ', e.index, e.action, e.parent,
                            e.index-(e.parent or 0), e.start-i*RECORD_SIZE,
                            e.start+e.length-i*RECORD_SIZE))
            desc.append('TBS Bytes: ' + format_bytes(tbs_bytes))
            flag_sz = 3
            sequences = []
            otbs = tbs_bytes
            while tbs_bytes:
                try:
                    val, extra, consumed = decode_tbs(tbs_bytes, flag_size=flag_sz)
                except:
                    break
                flag_sz = 4
                tbs_bytes = tbs_bytes[consumed:]
                extra = {bin(k):v for k, v in iteritems(extra)}
                sequences.append((val, extra))
            for j, seq in enumerate(sequences):
                desc.append('Sequence #%d: %r %r'%(j, seq[0], seq[1]))
            if tbs_bytes:
                desc.append('Remaining bytes: %s'%format_bytes(tbs_bytes))
            calculated_sequences = encode_strands_as_sequences(strands,
                    tbs_type=tbs_type)
            try:
                calculated_bytes = sequences_to_bytes(calculated_sequences)
            except:
                calculated_bytes = b'failed to calculate tbs bytes'
            if calculated_bytes != otbs:
                print('WARNING: TBS mismatch for record %d'%i)
                desc.append('WARNING: TBS mismatch!')
                desc.append('Calculated sequences: %r'%calculated_sequences)
            desc.append('')
            self.indexing_data.append('\n'.join(desc))
Пример #10
0
    def read_tbs(self):
        from calibre.ebooks.mobi.writer8.tbs import (
            Entry, DOC, collect_indexing_data, encode_strands_as_sequences,
            sequences_to_bytes, calculate_all_tbs, NegativeStrandIndex)
        entry_map = []
        for index in self.ncx_index:
            vals = list(index)[:-1] + [None, None, None, None]
            entry_map.append(Entry(*(vals[:12])))

        indexing_data = collect_indexing_data(
            entry_map, list(map(len, self.text_records)))
        self.indexing_data = [
            DOC + '\n' + textwrap.dedent('''\
                Index Entry lines are of the form:
                depth:index_number [action] parent (index_num-parent) Geometry

                Where Geometry is the start and end of the index entry w.r.t
                the start of the text record.

                ''')
        ]

        tbs_type = 8
        try:
            calculate_all_tbs(indexing_data)
        except NegativeStrandIndex:
            calculate_all_tbs(indexing_data, tbs_type=5)
            tbs_type = 5

        for i, strands in enumerate(indexing_data):
            rec = self.text_records[i]
            tbs_bytes = rec.trailing_data.get('indexing', b'')
            desc = ['Record #%d' % i]
            for s, strand in enumerate(strands):
                desc.append('Strand %d' % s)
                for entries in strand.itervalues():
                    for e in entries:
                        desc.append(
                            ' %s%d [%-9s] parent: %s (%d) Geometry: (%d, %d)' %
                            (e.depth * ('  ') + '- ', e.index, e.action,
                             e.parent, e.index -
                             (e.parent or 0), e.start - i * RECORD_SIZE,
                             e.start + e.length - i * RECORD_SIZE))
            desc.append('TBS Bytes: ' + format_bytes(tbs_bytes))
            flag_sz = 3
            sequences = []
            otbs = tbs_bytes
            while tbs_bytes:
                try:
                    val, extra, consumed = decode_tbs(tbs_bytes,
                                                      flag_size=flag_sz)
                except:
                    break
                flag_sz = 4
                tbs_bytes = tbs_bytes[consumed:]
                extra = {bin(k): v for k, v in extra.iteritems()}
                sequences.append((val, extra))
            for j, seq in enumerate(sequences):
                desc.append('Sequence #%d: %r %r' % (j, seq[0], seq[1]))
            if tbs_bytes:
                desc.append('Remaining bytes: %s' % format_bytes(tbs_bytes))
            calculated_sequences = encode_strands_as_sequences(
                strands, tbs_type=tbs_type)
            try:
                calculated_bytes = sequences_to_bytes(calculated_sequences)
            except:
                calculated_bytes = b'failed to calculate tbs bytes'
            if calculated_bytes != otbs:
                print('WARNING: TBS mismatch for record %d' % i)
                desc.append('WARNING: TBS mismatch!')
                desc.append('Calculated sequences: %r' % calculated_sequences)
            desc.append('')
            self.indexing_data.append('\n'.join(desc))
Пример #11
0
    def __str__(self):
        ans = ['*'*20 + ' MOBI %d Header '%self.file_version+ '*'*20]

        a = ans.append

        def i(d, x):
            x = 'NULL' if x == NULL_INDEX else x
            a('%s: %s'%(d, x))

        def r(d, attr):
            x = getattr(self, attr)
            if attr in self.relative_records and x != NULL_INDEX:
                a('%s: Absolute: %d Relative: %d'%(d, x, x-self.header_offset))
            else:
                i(d, x)

        a('Compression: %s'%self.compression)
        a('Unused: %r'%self.unused)
        a('Text length: %d'%self.text_length)
        a('Number of text records: %d'%self.number_of_text_records)
        a('Text record size: %d'%self.text_record_size)
        a('Encryption: %s'%self.encryption_type)
        a('Unknown: %r'%self.unknown)
        a('Identifier: %r'%self.identifier)
        a('Header length: %d'% self.length)
        a('Type: %s'%self.type)
        a('Encoding: %s'%self.encoding)
        a('UID: %r'%self.uid)
        a('File version: %d'%self.file_version)
        r('Meta Orth Index', 'meta_orth_indx')
        r('Meta Infl Index', 'meta_infl_indx')
        r('Secondary index record', 'secondary_index_record')
        a('Reserved: %r'%self.reserved)
        r('First non-book record', 'first_non_book_record')
        a('Full name offset: %d'%self.fullname_offset)
        a('Full name length: %d bytes'%self.fullname_length)
        a('Langcode: %r'%self.locale_raw)
        a('Language: %s'%self.language)
        a('Sub language: %s'%self.sublanguage)
        a('Input language: %r'%self.input_language)
        a('Output language: %r'%self.output_langauage)
        a('Min version: %d'%self.min_version)
        r('First Image index', 'first_image_index')
        r('Huffman record offset', 'huffman_record_offset')
        a('Huffman record count: %d'%self.huffman_record_count)
        r('Huffman table offset', 'datp_record_offset')
        a('Huffman table length: %r'%self.datp_record_count)
        a('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth))
        if self.has_drm_data:
            a('Unknown3: %r'%self.unknown3)
            r('DRM Offset', 'drm_offset')
            a('DRM Count: %s'%self.drm_count)
            a('DRM Size: %s'%self.drm_size)
            a('DRM Flags: %r'%self.drm_flags)
        if self.has_extra_data_flags:
            a('Unknown4: %r'%self.unknown4)
            if hasattr(self, 'first_text_record'):
                a('First content record: %d'%self.first_text_record)
                a('Last content record: %d'%self.last_text_record)
            else:
                r('FDST Index', 'fdst_idx')
            a('FDST Count: %d'% self.fdst_count)
            r('FCIS number', 'fcis_number')
            a('FCIS count: %d'% self.fcis_count)
            r('FLIS number', 'flis_number')
            a('FLIS count: %d'% self.flis_count)
            a('Unknown6: %r'% self.unknown6)
            r('SRCS record index', 'srcs_record_index')
            a('Number of SRCS records?: %d'%self.num_srcs_records)
            a('Unknown7: %r'%self.unknown7)
            a(('Extra data flags: %s (has multibyte: %s) '
                '(has indexing: %s) (has uncrossable breaks: %s)')%(
                    bin(self.extra_data_flags), self.has_multibytes,
                    self.has_indexing_bytes, self.has_uncrossable_breaks))
            r('NCX index', 'primary_index_record')
        if self.length >= 248:
            r('Sections Index', 'sect_idx')
            r('SKEL Index', 'skel_idx')
            r('DATP Index', 'datp_idx')
            r('Other Index', 'oth_idx')
            if self.unknown9:
                a('Unknown9: %r'%self.unknown9)

        ans = '\n'.join(ans)

        if self.has_exth:
            ans += '\n\n' + str(self.exth)
            ans += '\n\nBytes after EXTH (%d bytes): %s'%(
                    len(self.bytes_after_exth),
                    format_bytes(self.bytes_after_exth))

        ans += '\nNumber of bytes after full name: %d' % (len(self.raw) - (self.fullname_offset +
                self.fullname_length))

        ans += '\nRecord 0 length: %d'%len(self.raw)
        return ans