def extract_content(self, output_dir): txt = '' self.log.info(u'Decompressing text...') for i in range(1, self.header_record.num_records + 1): self.log.debug(u'\tDecompressing text section %i' % i) title = self.header_record.chapter_titles[i - 1] lines = [] title_added = False for line in self.decompress_text(i).splitlines(): line = fix_punct(line) line = line.strip() if not title_added and title in line: line = '<h1 class="chapter">' + line + '</h1>\n' title_added = True else: line = prepare_string_for_xml(line) lines.append('<p>%s</p>' % line) if not title_added: lines.insert(0, '<h1 class="chapter">' + title + '</h1>\n') txt += '\n'.join(lines) self.log.info(u'Converting text to OEB...') html = HTML_TEMPLATE % (self.header_record.title, txt) with open(os.path.join(output_dir, 'index.html'), 'wb') as index: index.write(html.encode('utf-8')) mi = self.get_metadata() manifest = [('index.html', None)] spine = ['index.html'] opf_writer(output_dir, 'metadata.opf', manifest, spine, mi) return os.path.join(output_dir, 'metadata.opf')
def extract_content(self, output_dir): txt = '' self.log.info('Decompressing text...') for i in range(1, self.header_record.num_records + 1): self.log.debug('\tDecompressing text section %i' % i) title = self.header_record.chapter_titles[i-1] lines = [] title_added = False for line in self.decompress_text(i).splitlines(): line = fix_punct(line) line = line.strip() if not title_added and title in line: line = u'<h1 class="chapter">' + line + u'</h1>\n' title_added = True else: line = prepare_string_for_xml(line) lines.append(u'<p>%s</p>' % line) if not title_added: lines.insert(0, u'<h1 class="chapter">' + title + u'</h1>\n') txt += '\n'.join(lines) self.log.info('Converting text to OEB...') html = HTML_TEMPLATE % (self.header_record.title, txt) with open(os.path.join(output_dir, 'index.html'), 'wb') as index: index.write(html.encode('utf-8')) mi = self.get_metadata() manifest = [('index.html', None)] spine = ['index.html'] opf_writer(output_dir, 'metadata.opf', manifest, spine, mi) return os.path.join(output_dir, 'metadata.opf')