Пример #1
0
 def read_book(self):
     self.book_ref = EPUB(self.filename)
     contents_found = self.book_ref.read_epub()
     if not contents_found:
         print('Cannot process: ' + self.filename)
         return
     self.book = self.book_ref.book
Пример #2
0
class ParseMOBI:
    def __init__(self, filename, temp_dir, file_md5):
        self.book_ref = None
        self.book = None
        self.filename = filename
        self.epub_filepath = None
        self.split_large_xml = False
        self.temp_dir = temp_dir
        self.extract_dir = os.path.join(temp_dir, file_md5)

    def read_book(self):
        with HidePrinting():
            KindleUnpack.unpackBook(self.filename, self.extract_dir)

        epub_filename = os.path.splitext(os.path.basename(
            self.filename))[0] + '.epub'

        self.epub_filepath = os.path.join(self.extract_dir, 'mobi8',
                                          epub_filename)
        if not os.path.exists(self.epub_filepath):
            zip_dir = os.path.join(self.extract_dir, 'mobi7')
            zip_file = os.path.join(self.extract_dir, epub_filename)
            self.epub_filepath = shutil.make_archive(zip_file, 'zip', zip_dir)
            self.split_large_xml = True

        self.book_ref = EPUB(self.epub_filepath)
        contents_found = self.book_ref.read_epub()
        if not contents_found:
            return False
        self.book = self.book_ref.book
        return True

    def get_title(self):
        return self.book['title']

    def get_author(self):
        return self.book['author']

    def get_year(self):
        return self.book['year']

    def get_cover_image(self):
        return self.book['cover']

    def get_isbn(self):
        return self.book['isbn']

    def get_tags(self):
        return self.book['tags']

    def get_contents(self):
        extract_path = os.path.join(self.extract_dir)
        zipfile.ZipFile(self.epub_filepath).extractall(extract_path)

        self.book_ref.parse_chapters(temp_dir=self.temp_dir,
                                     split_large_xml=self.split_large_xml)
        file_settings = {'images_only': False}
        return self.book['book_list'], file_settings
Пример #3
0
    def read_book(self):
        with HidePrinting():
            KindleUnpack.unpackBook(self.filename, self.extract_path)

        epub_filename = os.path.splitext(
            os.path.basename(self.filename))[0] + '.epub'
        self.epub_filepath = os.path.join(
            self.extract_path, 'mobi8', epub_filename)

        if not os.path.exists(self.epub_filepath):
            zip_dir = os.path.join(self.extract_path, 'mobi7')
            zip_file = os.path.join(
                self.extract_path, epub_filename)
            self.epub_filepath = shutil.make_archive(zip_file, 'zip', zip_dir)

        self.book = EPUB(self.epub_filepath, self.temp_dir)
Пример #4
0
class ParseEPUB:
    def __init__(self, filename, temp_dir, file_md5):
        # TODO
        # Maybe also include book description
        self.book_ref = None
        self.book = None
        self.filename = filename
        self.extract_path = os.path.join(temp_dir, file_md5)

    def read_book(self):
        self.book_ref = EPUB(self.filename)
        contents_found = self.book_ref.read_epub()
        if not contents_found:
            print('Cannot process: ' + self.filename)
            return
        self.book = self.book_ref.book

    def get_title(self):
        return self.book['title']

    def get_author(self):
        return self.book['author']

    def get_year(self):
        return self.book['year']

    def get_cover_image(self):
        return self.book['cover']

    def get_isbn(self):
        return self.book['isbn']

    def get_tags(self):
        return self.book['tags']

    def get_contents(self):
        zipfile.ZipFile(self.filename).extractall(self.extract_path)

        self.book_ref.parse_toc()
        self.book_ref.parse_chapters(temp_dir=self.extract_path)
        file_settings = {'images_only': False}
        return self.book['book_list'], file_settings
Пример #5
0
    def read_book(self):
        with HidePrinting():
            KindleUnpack.unpackBook(self.filename, self.extract_dir)

        epub_filename = os.path.splitext(os.path.basename(
            self.filename))[0] + '.epub'

        self.epub_filepath = os.path.join(self.extract_dir, 'mobi8',
                                          epub_filename)
        if not os.path.exists(self.epub_filepath):
            zip_dir = os.path.join(self.extract_dir, 'mobi7')
            zip_file = os.path.join(self.extract_dir, epub_filename)
            self.epub_filepath = shutil.make_archive(zip_file, 'zip', zip_dir)
            self.split_large_xml = True

        self.book_ref = EPUB(self.epub_filepath)
        contents_found = self.book_ref.read_epub()
        if not contents_found:
            return False
        self.book = self.book_ref.book
        return True
Пример #6
0
class ParseEPUB:
    def __init__(self, filename, temp_dir, file_md5):
        self.book = None
        self.filename = filename
        self.temp_dir = temp_dir
        self.extract_path = os.path.join(temp_dir, file_md5)

    def read_book(self):
        self.book = EPUB(self.filename, self.temp_dir)

    def generate_metadata(self):
        self.book.generate_metadata()
        return self.book.metadata

    def generate_content(self):
        zipfile.ZipFile(self.filename).extractall(self.extract_path)

        self.book.generate_toc()
        self.book.generate_content()

        toc = []
        content = []
        for count, i in enumerate(self.book.content):
            toc.append((i[0], i[1], count + 1))
            content.append(i[2])

        # Return toc, content, images_only
        return toc, content, False
Пример #7
0
class ParseMOBI:
    # This module parses Amazon ebooks using KindleUnpack to first create an
    # epub and then read the usual way

    def __init__(self, filename, temp_dir, file_md5):
        self.book = None
        self.filename = filename
        self.epub_filepath = None
        self.temp_dir = temp_dir
        self.extract_path = os.path.join(temp_dir, file_md5)

    def read_book(self):
        with HidePrinting():
            KindleUnpack.unpackBook(self.filename, self.extract_path)

        epub_filename = os.path.splitext(
            os.path.basename(self.filename))[0] + '.epub'
        self.epub_filepath = os.path.join(
            self.extract_path, 'mobi8', epub_filename)

        if not os.path.exists(self.epub_filepath):
            zip_dir = os.path.join(self.extract_path, 'mobi7')
            zip_file = os.path.join(
                self.extract_path, epub_filename)
            self.epub_filepath = shutil.make_archive(zip_file, 'zip', zip_dir)

        self.book = EPUB(self.epub_filepath, self.temp_dir)

    def generate_metadata(self):
        self.book.generate_metadata()
        return self.book.metadata

    def generate_content(self):
        zipfile.ZipFile(self.epub_filepath).extractall(self.extract_path)

        self.book.generate_toc()
        self.book.generate_content()

        toc = []
        content = []
        for count, i in enumerate(self.book.content):
            toc.append((1, i[1], count + 1))
            content.append(i[2])

        # Return toc, content, images_only
        return toc, content, False
Пример #8
0
 def read_book(self):
     self.book = EPUB(self.filename, self.temp_dir)