def __init__(self, record): self.record = record raw = self.record.raw # open('/t/index_header.bin', 'wb').write(raw) if raw[:4] != b'INDX': raise ValueError('Invalid Primary Index Record') self.header_length, = struct.unpack('>I', raw[4:8]) self.unknown1 = raw[8:12] self.header_type, = struct.unpack('>I', raw[12:16]) self.index_type, = struct.unpack('>I', raw[16:20]) self.index_type_desc = {0: 'normal', 2: 'inflection', 6: 'calibre'}.get(self.index_type, 'unknown') self.idxt_start, = struct.unpack('>I', raw[20:24]) self.index_count, = struct.unpack('>I', raw[24:28]) self.index_encoding_num, = struct.unpack('>I', raw[28:32]) self.index_encoding = {65001: 'utf-8', 1252: 'cp1252'}.get(self.index_encoding_num, 'unknown') if self.index_encoding == 'unknown': raise ValueError( 'Unknown index encoding: %d'%self.index_encoding_num) self.possibly_language = raw[32:36] self.num_index_entries, = struct.unpack('>I', raw[36:40]) self.ordt_start, = struct.unpack('>I', raw[40:44]) self.ligt_start, = struct.unpack('>I', raw[44:48]) self.num_of_ligt_entries, = struct.unpack('>I', raw[48:52]) self.num_of_cncx_blocks, = struct.unpack('>I', raw[52:56]) self.unknown2 = raw[56:180] self.tagx_offset, = struct.unpack(b'>I', raw[180:184]) if self.tagx_offset != self.header_length: raise ValueError('TAGX offset and header length disagree') self.unknown3 = raw[184:self.header_length] tagx = raw[self.header_length:] if not tagx.startswith(b'TAGX'): raise ValueError('Invalid TAGX section') self.tagx_header_length, = struct.unpack('>I', tagx[4:8]) self.tagx_control_byte_count, = struct.unpack('>I', tagx[8:12]) self.tagx_entries = [TagX(*x) for x in parse_tagx_section(tagx)[1]] if self.tagx_entries and not self.tagx_entries[-1].is_eof: raise ValueError('TAGX last entry is not EOF') idxt0_pos = self.header_length+self.tagx_header_length last_num, consumed = decode_hex_number(raw[idxt0_pos:]) count_pos = idxt0_pos + consumed self.ncx_count, = struct.unpack(b'>H', raw[count_pos:count_pos+2]) self.last_entry = last_num if last_num != self.ncx_count - 1: raise ValueError('Last id number in the NCX != NCX count - 1') # There may be some alignment zero bytes between the end of the idxt0 # and self.idxt_start idxt = raw[self.idxt_start:] if idxt[:4] != b'IDXT': raise ValueError('Invalid IDXT header') length_check, = struct.unpack(b'>H', idxt[4:6]) if length_check != self.header_length + self.tagx_header_length: raise ValueError('Length check failed')