Exemplo n.º 1
0
def do_explode(path, dest):
    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
    from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
    with open(path, 'rb') as stream:
        mr = MobiReader(stream, default_log, None, None)

        with CurrentDir(dest):
            mr = Mobi8Reader(mr, default_log, for_tweak=True)
            opf = os.path.abspath(mr())
            obfuscated_fonts = mr.encrypted_fonts

    return opf, obfuscated_fonts
Exemplo n.º 2
0
def do_explode(path, dest):
    with open(path, 'rb') as stream:
        mr = MobiReader(stream, default_log, None, None)

        with CurrentDir(dest):
            mr = Mobi8Reader(mr, default_log)
            opf = os.path.abspath(mr())
            try:
                os.remove('debug-raw.html')
            except:
                pass

    return opf
Exemplo n.º 3
0
    def convert(self, stream, options, file_ext, log, accelerators):
        self.is_kf8 = False
        self.mobi_is_joint = False

        from calibre.ebooks.mobi.reader.mobi6 import MobiReader
        from lxml import html
        parse_cache = {}
        try:
            mr = MobiReader(stream, log, options.input_encoding,
                            options.debug_pipeline)
            if mr.kf8_type is None:
                mr.extract_content('.', parse_cache)

        except:
            mr = MobiReader(stream,
                            log,
                            options.input_encoding,
                            options.debug_pipeline,
                            try_extra_data_fix=True)
            if mr.kf8_type is None:
                mr.extract_content('.', parse_cache)

        if mr.kf8_type is not None:
            log('Found KF8 MOBI of type %r' % mr.kf8_type)
            if mr.kf8_type == 'joint':
                self.mobi_is_joint = True
            from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
            mr = Mobi8Reader(mr, log)
            opf = os.path.abspath(mr())
            self.encrypted_fonts = mr.encrypted_fonts
            self.is_kf8 = True
            return opf

        raw = parse_cache.pop('calibre_raw_mobi_markup', False)
        if raw:
            if isinstance(raw, unicode_type):
                raw = raw.encode('utf-8')
            with lopen('debug-raw.html', 'wb') as f:
                f.write(raw)
        from calibre.ebooks.oeb.base import close_self_closing_tags
        for f, root in parse_cache.items():
            raw = html.tostring(root,
                                encoding='utf-8',
                                method='xml',
                                include_meta_content_type=False)
            raw = close_self_closing_tags(raw)
            with lopen(f, 'wb') as q:
                q.write(raw)
        accelerators['pagebreaks'] = '//h:div[@class="mbp_pagebreak"]'
        return mr.created_opf_path
Exemplo n.º 4
0
def do_explode(path, dest):
    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
    from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
    with open(path, 'rb') as stream:
        mr = MobiReader(stream, default_log, None, None)

        with CurrentDir(dest):
            mr = Mobi8Reader(mr, default_log)
            opf = os.path.abspath(mr())
            obfuscated_fonts = mr.encrypted_fonts
            try:
                os.remove('debug-raw.html')
            except:
                pass

    return opf, obfuscated_fonts
Exemplo n.º 5
0
def parse_mobi(pathtoebook, book_fmt):
    mobiReader = MobiReader(pathtoebook, default_log)
    html = b''
    offset = 1
    # use code from calibre.ebooks.mobi.reader.mobi8:Mobi8Reader.__call__
    if book_fmt == 'AZW3' and mobiReader.kf8_type == 'joint':
        offset = mobiReader.kf8_boundary + 2
    mobiReader.extract_text(offset=offset)
    html = mobiReader.mobi_html
    if book_fmt == 'AZW3':
        m8r = Mobi8Reader(mobiReader, default_log)
        m8r.kf8_sections = mobiReader.sections[offset-1:]
        m8r.read_indices()
        m8r.build_parts()
        html = b''.join(m8r.parts)

    # match text between HTML tags
    for match_text in re.finditer(b'>[^<>]+<', html):
        yield (match_text.start() + 1, match_text.group(0)[1:-1])
Exemplo n.º 6
0
def do_explode(path, dest):
    from calibre.ebooks.mobi.reader.mobi6 import MobiReader
    from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
    with open(path, 'rb') as stream:
        mr = MobiReader(stream, default_log, None, None)

        with CurrentDir(dest):
            mr = Mobi8Reader(mr, default_log)
            opf = os.path.abspath(mr())
            obfuscated_fonts = mr.encrypted_fonts
            # If there are no images then the azw3 input plugin dumps all
            # binary records as .unknown images, remove them
            if os.path.exists('images') and os.path.isdir('images'):
                files = os.listdir('images')
                unknown = [x for x in files if x.endswith('.unknown')]
                if len(files) == len(unknown):
                    [os.remove('images/'+f) for f in files]
            try:
                os.remove('debug-raw.html')
            except:
                pass

    return opf, obfuscated_fonts
Exemplo n.º 7
0
def parse_mobi(book_path):
    # use code from calibre.ebooks.mobi.reader.mobi8:Mobi8Reader.__call__
    # and calibre.ebook.conversion.plugins.mobi_input:MOBIInput.convert
    # https://github.com/kevinhendricks/KindleUnpack/blob/master/lib/mobi_k8proc.py#L216
    try:
        mr = MobiReader(book_path, default_log)
    except Exception:
        mr = MobiReader(book_path, default_log, try_extra_data_fix=True)
    if mr.kf8_type == 'joint':
        raise Exception('JointMOBI')
    mr.check_for_drm()
    mr.extract_text()
    html = mr.mobi_html
    if mr.kf8_type == 'standalone':
        m8r = Mobi8Reader(mr, default_log)
        m8r.kf8_sections = mr.sections
        m8r.read_indices()
        m8r.build_parts()
        html = b''.join(m8r.parts)

    # match text between HTML tags
    for match_text in re.finditer(b'>[^<>]+<', html):
        yield (match_text.group(0)[1:-1].decode('utf-8'),
               match_text.start() + 1)