Exemplo n.º 1
0
    def extract_content(self, output_dir):
        txt = ''

        self.log.info(u'Decompressing text...')
        for i in range(1, self.header_record.num_records + 1):
            self.log.debug(u'\tDecompressing text section %i' % i)
            title = self.header_record.chapter_titles[i - 1]
            lines = []
            title_added = False
            for line in self.decompress_text(i).splitlines():
                line = fix_punct(line)
                line = line.strip()
                if not title_added and title in line:
                    line = '<h1 class="chapter">' + line + '</h1>\n'
                    title_added = True
                else:
                    line = prepare_string_for_xml(line)
                lines.append('<p>%s</p>' % line)
            if not title_added:
                lines.insert(0, '<h1 class="chapter">' + title + '</h1>\n')
            txt += '\n'.join(lines)

        self.log.info(u'Converting text to OEB...')
        html = HTML_TEMPLATE % (self.header_record.title, txt)
        with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
            index.write(html.encode('utf-8'))

        mi = self.get_metadata()
        manifest = [('index.html', None)]
        spine = ['index.html']
        opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)

        return os.path.join(output_dir, 'metadata.opf')
Exemplo n.º 2
0
    def extract_content(self, output_dir):
        txt = ''

        self.log.info('Decompressing text...')
        for i in range(1, self.header_record.num_records + 1):
            self.log.debug('\tDecompressing text section %i' % i)
            title = self.header_record.chapter_titles[i-1]
            lines = []
            title_added = False
            for line in self.decompress_text(i).splitlines():
                line = fix_punct(line)
                line = line.strip()
                if not title_added and title in line:
                    line = u'<h1 class="chapter">' + line + u'</h1>\n'
                    title_added = True
                else:
                    line = prepare_string_for_xml(line)
                lines.append(u'<p>%s</p>' % line)
            if not title_added:
                lines.insert(0, u'<h1 class="chapter">' + title + u'</h1>\n')
            txt += '\n'.join(lines)

        self.log.info('Converting text to OEB...')
        html = HTML_TEMPLATE % (self.header_record.title, txt)
        with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
            index.write(html.encode('utf-8'))

        mi = self.get_metadata()
        manifest = [('index.html', None)]
        spine = ['index.html']
        opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)

        return os.path.join(output_dir, 'metadata.opf')