def __init__(self, mf): self.mf = mf h, h8 = mf.mobi_header, mf.mobi8_header first_text_record = 1 offset = 0 self.resource_ranges = [(h8.first_resource_record, h8.last_resource_record, h8.first_image_index) ] if mf.kf8_type == 'joint': offset = h.exth.kf8_header_index self.resource_ranges.insert( 0, (h.first_resource_record, h.last_resource_record, h.first_image_index)) self.text_records = [ TextRecord(i, r, h8.extra_data_flags, mf.decompress8) for i, r in enumerate(mf.records[first_text_record + offset:first_text_record + offset + h8.number_of_text_records]) ] self.raw_text = b''.join(r.raw for r in self.text_records) self.header = self.mf.mobi8_header self.extract_resources(mf.records) self.read_fdst() self.read_indices() self.build_files() self.read_tbs()
def __init__(self, mf): self.mf = mf h, h8 = mf.mobi_header, mf.mobi8_header first_text_record = 1 offset = 0 res_end = len(mf.records) if mf.kf8_type == 'joint': offset = h.exth.kf8_header_index res_end = offset - 1 self.resource_records = mf.records[h.first_non_book_record:res_end] self.text_records = [ TextRecord(i, r, h8.extra_data_flags, mf.decompress8) for i, r in enumerate(mf.records[first_text_record + offset:first_text_record + offset + h8.number_of_text_records]) ] self.raw_text = b''.join(r.raw for r in self.text_records) self.header = self.mf.mobi8_header self.extract_resources() self.read_fdst() self.read_indices() self.build_files() self.read_tbs()
def __init__(self, mf): for x in ('raw', 'palmdb', 'record_headers', 'records', 'mobi_header', 'huffman_record_nums',): setattr(self, x, getattr(mf, x)) self.index_header = self.index_record = None self.indexing_record_nums = set() pir = getattr(self.mobi_header, 'primary_index_record', NULL_INDEX) if pir != NULL_INDEX: self.index_header = IndexHeader(self.records[pir]) numi = self.index_header.index_count self.cncx = CNCX(self.records[ pir+1+numi:pir+1+numi+self.index_header.num_of_cncx_blocks], self.index_header.index_encoding) self.index_record = IndexRecord(self.records[pir+1:pir+1+numi], self.index_header, self.cncx) self.indexing_record_nums = set(range(pir, pir+1+numi+self.index_header.num_of_cncx_blocks)) self.secondary_index_record = self.secondary_index_header = None sir = self.mobi_header.secondary_index_record if sir != NULL_INDEX: self.secondary_index_header = SecondaryIndexHeader(self.records[sir]) numi = self.secondary_index_header.index_count self.indexing_record_nums.add(sir) self.secondary_index_record = IndexRecord( self.records[sir+1:sir+1+numi], self.secondary_index_header, self.cncx) self.indexing_record_nums |= set(range(sir+1, sir+1+numi)) ntr = self.mobi_header.number_of_text_records fii = self.mobi_header.first_image_index self.text_records = [TextRecord(r, self.records[r], self.mobi_header.extra_data_flags, mf.decompress6) for r in range(1, min(len(self.records), ntr+1))] self.image_records, self.binary_records = [], [] self.font_records = [] image_index = 0 for i in range(self.mobi_header.first_resource_record, min(self.mobi_header.last_resource_record, len(self.records))): if i in self.indexing_record_nums or i in self.huffman_record_nums: continue image_index += 1 r = self.records[i] fmt = None if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE', b'FONT', b'CRES', b'CONT', b'CMET'}: try: fmt = what(None, r.raw) except: pass if fmt is not None: self.image_records.append(ImageRecord(image_index, r, fmt)) elif r.raw[:4] == b'FONT': self.font_records.append(FontRecord(i, r)) else: self.binary_records.append(BinaryRecord(i, r)) if self.index_record is not None: self.tbs_indexing = TBSIndexing(self.text_records, self.index_record.indices, self.mobi_header.type_raw)