示例#1
0
文件: reader.py 项目: pkuhzx/calibre
 def read_secondary_header(self):
     offset = self.hdr_len + (self.num_pieces * self.PIECE_SIZE)
     bytes = self.read_raw(offset, self.sec_hdr_len)
     offset = int32(bytes[4:])
     while offset < len(bytes):
         blocktype = bytes[offset:offset + 4]
         blockver = u32(bytes[offset + 4:])
         if blocktype == 'CAOL':
             if blockver != 2:
                 raise LitError('Unknown CAOL block format %d' % blockver)
             self.creator_id = u32(bytes[offset + 12:])
             self.entry_chunklen = u32(bytes[offset + 20:])
             self.count_chunklen = u32(bytes[offset + 24:])
             self.entry_unknown = u32(bytes[offset + 28:])
             self.count_unknown = u32(bytes[offset + 32:])
             offset += 48
         elif blocktype == 'ITSF':
             if blockver != 4:
                 raise LitError('Unknown ITSF block format %d' % blockver)
             if u32(bytes[offset + 4 + 16:]):
                 raise LitError('This file has a 64bit content offset')
             self.content_offset = u32(bytes[offset + 16:])
             self.timestamp = u32(bytes[offset + 24:])
             self.language_id = u32(bytes[offset + 28:])
             offset += 48
     if not hasattr(self, 'content_offset'):
         raise LitError('Could not figure out the content offset')
示例#2
0
文件: reader.py 项目: pkuhzx/calibre
 def read_manifest(self):
     if '/manifest' not in self.entries:
         raise LitError('Lit file does not have a valid manifest')
     raw = self.get_file('/manifest')
     self.manifest = {}
     self.paths = {self.opf_path: None}
     while raw:
         slen, raw = ord(raw[0]), raw[1:]
         if slen == 0:
             break
         root, raw = raw[:slen].decode('utf8'), raw[slen:]
         if not raw:
             raise LitError('Truncated manifest')
         for state in ['spine', 'not spine', 'css', 'images']:
             num_files, raw = int32(raw), raw[4:]
             if num_files == 0:
                 continue
             for i in range(num_files):
                 if len(raw) < 5:
                     raise LitError('Truncated manifest')
                 offset, raw = u32(raw), raw[4:]
                 internal, raw = consume_sized_utf8_string(raw)
                 original, raw = consume_sized_utf8_string(raw)
                 # The path should be stored unquoted, but not always
                 original = urlunquote(original)
                 # Is this last one UTF-8 or ASCIIZ?
                 mime_type, raw = consume_sized_utf8_string(raw, zpad=True)
                 self.manifest[internal] = ManifestItem(
                     original, internal, mime_type, offset, root, state)
     mlist = self.manifest.values()
     # Remove any common path elements
     if len(mlist) > 1:
         shared = mlist[0].path
         for item in mlist[1:]:
             path = item.path
             while shared and not path.startswith(shared):
                 try:
                     shared = shared[:shared.rindex("/", 0, -2) + 1]
                 except ValueError:
                     shared = None
             if not shared:
                 break
         if shared:
             slen = len(shared)
             for item in mlist:
                 item.path = item.path[slen:]
     # Fix any straggling absolute paths
     for item in mlist:
         if item.path[0] == '/':
             item.path = os.path.basename(item.path)
         self.paths[item.path] = item
示例#3
0
文件: reader.py 项目: pkuhzx/calibre
 def read_section_names(self):
     if '::DataSpace/NameList' not in self.entries:
         raise LitError('Lit file does not have a valid NameList')
     raw = self.get_file('::DataSpace/NameList')
     if len(raw) < 4:
         raise LitError('Invalid Namelist section')
     pos = 4
     num_sections = u16(raw[2:pos])
     self.section_names = [""] * num_sections
     self.section_data = [None] * num_sections
     for section in range(num_sections):
         size = u16(raw[pos:pos + 2])
         pos += 2
         size = size * 2 + 2
         if pos + size > len(raw):
             raise LitError('Invalid Namelist section')
         self.section_names[section] = \
             raw[pos:pos+size].decode('utf-16-le').rstrip('\000')
         pos += size
示例#4
0
文件: reader.py 项目: pkuhzx/calibre
 def __init__(self, filename_or_stream, log):
     self._warn = log.warn
     if hasattr(filename_or_stream, 'read'):
         self.stream = filename_or_stream
     else:
         self.stream = open(filename_or_stream, 'rb')
     try:
         self.opf_path = os.path.splitext(os.path.basename(
             self.stream.name))[0] + '.opf'
     except AttributeError:
         self.opf_path = 'content.opf'
     if self.magic != 'ITOLITLS':
         raise LitError('Not a valid LIT file')
     if self.version != 1:
         raise LitError('Unknown LIT version %d' % (self.version, ))
     self.read_secondary_header()
     self.read_header_pieces()
     self.read_section_names()
     self.read_manifest()
     self.read_drm()
示例#5
0
文件: reader.py 项目: pkuhzx/calibre
 def read_directory(self, piece):
     if not piece.startswith('IFCM'):
         raise LitError('Header piece #1 is not main directory.')
     chunk_size, num_chunks = int32(piece[8:12]), int32(piece[24:28])
     if (32 + (num_chunks * chunk_size)) != len(piece):
         raise LitError('IFCM header has incorrect length')
     self.entries = {}
     for i in range(num_chunks):
         offset = 32 + (i * chunk_size)
         chunk = piece[offset:offset + chunk_size]
         tag, chunk = chunk[:4], chunk[4:]
         if tag != 'AOLL':
             continue
         remaining, chunk = int32(chunk[:4]), chunk[4:]
         if remaining >= chunk_size:
             raise LitError('AOLL remaining count is negative')
         remaining = chunk_size - (remaining + 48)
         entries = u16(chunk[-2:])
         if entries == 0:
             # Hopefully will work even without a correct entries count
             entries = (2**16) - 1
         chunk = chunk[40:]
         for j in range(entries):
             if remaining <= 0:
                 break
             namelen, chunk, remaining = encint(chunk, remaining)
             if namelen != (namelen & 0x7fffffff):
                 raise LitError('Directory entry had 64bit name length.')
             if namelen > remaining - 3:
                 raise LitError('Read past end of directory chunk')
             try:
                 name = chunk[:namelen].decode('utf-8')
                 chunk = chunk[namelen:]
                 remaining -= namelen
             except UnicodeDecodeError:
                 break
             section, chunk, remaining = encint(chunk, remaining)
             offset, chunk, remaining = encint(chunk, remaining)
             size, chunk, remaining = encint(chunk, remaining)
             entry = DirectoryEntry(name, section, offset, size)
             self.entries[name] = entry
示例#6
0
文件: reader.py 项目: pkuhzx/calibre
def read_utf8_char(bytes, pos):
    c = ord(bytes[pos])
    mask = 0x80
    if (c & mask):
        elsize = 0
        while c & mask:
            mask >>= 1
            elsize += 1
        if (mask <= 1) or (mask == 0x40):
            raise LitError('Invalid UTF8 character: %s' % repr(bytes[pos]))
    else:
        elsize = 1
    if elsize > 1:
        if elsize + pos > len(bytes):
            raise LitError('Invalid UTF8 character: %s' % repr(bytes[pos]))
        c &= (mask - 1)
        for i in range(1, elsize):
            b = ord(bytes[pos + i])
            if (b & 0xC0) != 0x80:
                raise LitError('Invalid UTF8 character: %s' %
                               repr(bytes[pos:pos + i]))
            c = (c << 6) | (b & 0x3F)
    return codepoint_to_chr(c), pos + elsize
示例#7
0
    def decompress(self, content, control, reset_table):
        if len(control) < 32 or control[CONTROL_TAG:CONTROL_TAG +
                                        4] != b"LZXC":
            raise LitError("Invalid ControlData tag value")
        if len(reset_table) < (RESET_INTERVAL + 8):
            raise LitError("Reset table is too short")
        if u32(reset_table[RESET_UCLENGTH + 4:]) != 0:
            raise LitError("Reset table has 64bit value for UCLENGTH")

        result = []

        window_size = 14
        u = u32(control[CONTROL_WINDOW_SIZE:])
        while u > 0:
            u >>= 1
            window_size += 1
        if window_size < 15 or window_size > 21:
            raise LitError("Invalid window in ControlData")
        lzx.init(window_size)

        ofs_entry = int32(reset_table[RESET_HDRLEN:]) + 8
        uclength = int32(reset_table[RESET_UCLENGTH:])
        accum = int32(reset_table[RESET_INTERVAL:])
        bytes_remaining = uclength
        window_bytes = (1 << window_size)
        base = 0

        while ofs_entry < len(reset_table):
            if accum >= window_bytes:
                accum = 0
                size = int32(reset_table[ofs_entry:])
                u = int32(reset_table[ofs_entry + 4:])
                if u != 0:
                    raise LitError("Reset table entry greater than 32 bits")
                if size >= len(content):
                    self._warn("LZX reset table entry out of bounds")
                if bytes_remaining >= window_bytes:
                    lzx.reset()
                    try:
                        result.append(
                            lzx.decompress(content[base:size], window_bytes))
                    except lzx.LZXError:
                        self.warn("LZX decompression error; skipping chunk")
                    bytes_remaining -= window_bytes
                    base = size
            accum += int32(reset_table[RESET_INTERVAL:])
            ofs_entry += 8
        if bytes_remaining < window_bytes and bytes_remaining > 0:
            lzx.reset()
            try:
                result.append(lzx.decompress(content[base:], bytes_remaining))
            except lzx.LZXError:
                self.warn("LZX decompression error; skipping chunk")
            bytes_remaining = 0
        if bytes_remaining > 0:
            raise LitError("Failed to completely decompress section")
        return b''.join(result)
示例#8
0
文件: reader.py 项目: pkuhzx/calibre
 def get_section_uncached(self, section):
     name = self.section_names[section]
     path = '::DataSpace/Storage/' + name
     transform = self.get_file(path + '/Transform/List')
     content = self.get_file(path + '/Content')
     control = self.get_file(path + '/ControlData')
     while len(transform) >= 16:
         csize = (int32(control) + 1) * 4
         if csize > len(control) or csize <= 0:
             raise LitError("ControlData is too short")
         guid = msguid(transform)
         if guid == DESENCRYPT_GUID:
             content = self.decrypt(content)
             control = control[csize:]
         elif guid == LZXCOMPRESS_GUID:
             reset_table = self.get_file('/'.join(
                 ('::DataSpace/Storage', name, 'Transform',
                  LZXCOMPRESS_GUID, 'InstanceData/ResetTable')))
             content = self.decompress(content, control, reset_table)
             control = control[csize:]
         else:
             raise LitError("Unrecognized transform: %s." % repr(guid))
         transform = transform[16:]
     return content
示例#9
0
文件: reader.py 项目: pkuhzx/calibre
 def read_header_pieces(self):
     src = self.header[self.hdr_len:]
     for i in range(self.num_pieces):
         piece = src[i * self.PIECE_SIZE:(i + 1) * self.PIECE_SIZE]
         if u32(piece[4:]) != 0 or u32(piece[12:]) != 0:
             raise LitError('Piece %s has 64bit value' % repr(piece))
         offset, size = u32(piece), int32(piece[8:])
         piece = self.read_raw(offset, size)
         if i == 0:
             continue  # Dont need this piece
         elif i == 1:
             if u32(piece[8:])  != self.entry_chunklen or \
                u32(piece[12:]) != self.entry_unknown:
                 raise LitError('Secondary header does not match piece')
             self.read_directory(piece)
         elif i == 2:
             if u32(piece[8:])  != self.count_chunklen or \
                u32(piece[12:]) != self.count_unknown:
                 raise LitError('Secondary header does not match piece')
             continue  # No data needed from this piece
         elif i == 3:
             self.piece3_guid = piece
         elif i == 4:
             self.piece4_guid = piece
示例#10
0
文件: reader.py 项目: pkuhzx/calibre
 def read_drm(self):
     self.drmlevel = 0
     if '/DRMStorage/Licenses/EUL' in self.entries:
         self.drmlevel = 5
     elif '/DRMStorage/DRMBookplate' in self.entries:
         self.drmlevel = 3
     elif '/DRMStorage/DRMSealed' in self.entries:
         self.drmlevel = 1
     else:
         return
     if self.drmlevel < 5:
         msdes.deskey(self.calculate_deskey(), msdes.DE1)
         bookkey = msdes.des(self.get_file('/DRMStorage/DRMSealed'))
         if bookkey[0] != '\000':
             raise LitError('Unable to decrypt title key!')
         self.bookkey = bookkey[1:9]
     else:
         raise DRMError("Cannot access DRM-protected book")
示例#11
0
文件: reader.py 项目: pkuhzx/calibre
    def binary_to_text_inner(self, bin, buf, stack):
        (depth, tag_name, current_map, dynamic_tag, errors, in_censorship,
         is_goingdown, state, flags) = stack.pop()

        if state == 'close tag':
            if not tag_name:
                raise LitError('Tag ends before it begins.')
            buf.write(encode(u''.join(('</', tag_name, '>'))))
            dynamic_tag = 0
            tag_name = None
            state = 'text'

        while self.cpos < len(bin):
            c, self.cpos = read_utf8_char(bin, self.cpos)
            oc = ord(c)

            if state == 'text':
                if oc == 0:
                    state = 'get flags'
                    continue
                elif c == '\v':
                    c = '\n'
                elif c == '>':
                    c = '>>'
                elif c == '<':
                    c = '<<'
                buf.write(encode(c))

            elif state == 'get flags':
                if oc == 0:
                    state = 'text'
                    continue
                flags = oc
                state = 'get tag'

            elif state == 'get tag':
                state = 'text' if oc == 0 else 'get attr'
                if flags & FLAG_OPENING:
                    tag = oc
                    buf.write('<')
                    if not (flags & FLAG_CLOSING):
                        is_goingdown = True
                    if tag == 0x8000:
                        state = 'get custom length'
                        continue
                    if flags & FLAG_ATOM:
                        if not self.tag_atoms or tag not in self.tag_atoms:
                            raise LitError("atom tag %d not in atom tag list" %
                                           tag)
                        tag_name = self.tag_atoms[tag]
                        current_map = self.attr_atoms
                    elif tag < len(self.tag_map):
                        tag_name = self.tag_map[tag]
                        current_map = self.tag_to_attr_map[tag]
                    else:
                        dynamic_tag += 1
                        errors += 1
                        tag_name = '?' + codepoint_to_chr(tag) + '?'
                        current_map = self.tag_to_attr_map[tag]
                        print('WARNING: tag %s unknown' %
                              codepoint_to_chr(tag))
                    buf.write(encode(tag_name))
                elif flags & FLAG_CLOSING:
                    if depth == 0:
                        raise LitError('Extra closing tag %s at %d' %
                                       (tag_name, self.cpos))
                    break

            elif state == 'get attr':
                in_censorship = False
                if oc == 0:
                    state = 'text'
                    if not is_goingdown:
                        tag_name = None
                        dynamic_tag = 0
                        buf.write(' />')
                    else:
                        buf.write('>')
                        frame = (depth, tag_name, current_map, dynamic_tag,
                                 errors, in_censorship, False, 'close tag',
                                 flags)
                        stack.append(frame)
                        frame = (depth + 1, None, None, 0, 0, False, False,
                                 'text', 0)
                        stack.append(frame)
                        break
                else:
                    if oc == 0x8000:
                        state = 'get attr length'
                        continue
                    attr = None
                    if current_map and oc in current_map and current_map[oc]:
                        attr = current_map[oc]
                    elif oc in self.attr_map:
                        attr = self.attr_map[oc]
                    if not attr or not isinstance(attr, string_or_bytes):
                        raise LitError('Unknown attribute %d in tag %s' %
                                       (oc, tag_name))
                    if attr.startswith('%'):
                        in_censorship = True
                        state = 'get value length'
                        continue
                    buf.write(' ' + encode(attr) + '=')
                    if attr in ['href', 'src']:
                        state = 'get href length'
                    else:
                        state = 'get value length'

            elif state == 'get value length':
                if not in_censorship:
                    buf.write('"')
                count = oc - 1
                if count == 0:
                    if not in_censorship:
                        buf.write('"')
                    in_censorship = False
                    state = 'get attr'
                    continue
                state = 'get value'
                if oc == 0xffff:
                    continue
                if count < 0 or count > (len(bin) - self.cpos):
                    raise LitError('Invalid character count %d' % count)

            elif state == 'get value':
                if count == 0xfffe:
                    if not in_censorship:
                        buf.write('%s"' % (oc - 1))
                    in_censorship = False
                    state = 'get attr'
                elif count > 0:
                    if not in_censorship:
                        if c == '"':
                            c = '&quot;'
                        elif c == '<':
                            c = '&lt;'
                        buf.write(c.encode('ascii', 'xmlcharrefreplace'))
                    count -= 1
                if count == 0:
                    if not in_censorship:
                        buf.write('"')
                    in_censorship = False
                    state = 'get attr'

            elif state == 'get custom length':
                count = oc - 1
                if count <= 0 or count > len(bin) - self.cpos:
                    raise LitError('Invalid character count %d' % count)
                dynamic_tag += 1
                state = 'get custom'
                tag_name = ''

            elif state == 'get custom':
                tag_name += c
                count -= 1
                if count == 0:
                    buf.write(encode(tag_name))
                    state = 'get attr'

            elif state == 'get attr length':
                count = oc - 1
                if count <= 0 or count > (len(bin) - self.cpos):
                    raise LitError('Invalid character count %d' % count)
                buf.write(' ')
                state = 'get custom attr'

            elif state == 'get custom attr':
                buf.write(encode(c))
                count -= 1
                if count == 0:
                    buf.write('=')
                    state = 'get value length'

            elif state == 'get href length':
                count = oc - 1
                if count <= 0 or count > (len(bin) - self.cpos):
                    raise LitError('Invalid character count %d' % count)
                href = ''
                state = 'get href'

            elif state == 'get href':
                href += c
                count -= 1
                if count == 0:
                    doc, frag = urldefrag(href[1:])
                    path = self.item_path(doc)
                    if frag:
                        path = '#'.join((path, frag))
                    path = urlnormalize(path)
                    buf.write(encode(u'"%s"' % path))
                    state = 'get attr'