Exemplo n.º 1
0
    def __init__(self, path_to_folder):
        self.path = path_to_folder
        files = []
        for r, d, f in os.walk(self.path):
            for file in f:
                if '.mobi' in file:
                    files.append(os.path.join(r, file))


        for f in files:
            book = Mobi(f)
            book.parse()
            try:
                title = book.title()
                author = book.author()

                author = author.decode()
                title = title.decode()

                result = author + ' - ' + title + "\n"
                print(result)

                w = Files()
                w.save_file(result)


            except:
                print("pass", f)
Exemplo n.º 2
0
def readMobi(eBook):

    #Open up our book and parse it 
    book = Mobi(eBook)
    book.parse()
    #Return the information we want
    return(book.title().decode("utf-8") + " " + book.author().decode("utf-8"))
Exemplo n.º 3
0
 def _getFallbackReader(self):
     if self.bookFallback is None:
         try:
             self.bookFallback = Mobi(str(self.filePath))
             # Need to parse all the header data in the book
             self.bookFallback.parse()
         except:
             log("MobiEBook: Expected exception for secondary reader, book %s with error: %s" % (self.filePath, traceback.format_exc()), xbmc.LOGERROR)
     return self.bookFallback
Exemplo n.º 4
0
def read_mobi_data(nombre_comp):
    try:
        book = Mobi(nombre_comp)
        book.parse()
        tit = book.title().decode('utf-8')
        aut = book.author().decode('utf-8')
        return [aut, tit, '.mobi']
    except Exception as err:
        print(os.path.join(nombre_comp) + ' ERROR:' + str(err))
        return [None, None, None]
Exemplo n.º 5
0
    def extract_metadata_mobi(self, book):
        book = Mobi(book)
        book.parse()
        try:
            cover_image = book.readImageRecord(0)
        except KeyError:
            cover_image = None
        title = book.title().decode("utf-8")
        author = book.author().decode("utf-8")
        book_config = book.config
        try:
            description = self.stripTags(
                book_config['exth']['records'][103].decode("utf-8"))
        except KeyError:
            description = None
        try:
            identifier = book_config['exth']['records'][104].decode("utf-8")
        except KeyError:
            identifier = None
        try:
            publisher = book_config['exth']['records'][101].decode("utf-8")
        except KeyError:
            publisher = None
        date = None
        rights = None
        try:
            ftags = book_config['exth']['records'][105].decode("utf-8")
            if ":" in ftags:
                ftags = ftags.replace(":", ",")
            elif ";" in ftags:
                ftags = ftags.replace(";", ",")
            # elif re.search(r"\s", ftags):  # Must be final assignment to avoid spliting on multiple delimeters
            #    ftags = ftags.replace(" ", ",")
        except KeyError:
            ftags = None

        return [
            title,
            author,
            cover_image,
            book.f.name,
            description,
            identifier,
            publisher,
            date,
            rights,
            ftags,
        ]
Exemplo n.º 6
0
 def collect_book_tokens(self, book_path):
     book = Mobi(book_path)
     book.parse()
     
     records = []
     sentences = []
        
     all_words = []
     for record in tqdm(book, desc='record_no'):
         record = record.decode('utf-8','replace')
         
         for item in BeautifulSoup(record, 'lxml').find_all('p'):
             block = item.text.lower()
     
             tokens = word_tokenize(block)
             all_words += tokens
 
     return all_words
Exemplo n.º 7
0
def read_mobi(filepath):
    book = Mobi(filepath)
    book.parse()

    records = []
    for record in book:
        records.append(record)

    full_text = ' '.join(records)
    title = book.title().decode('utf-8')
    author = book.author().decode('utf-8')

    doc = {
        'filepath': filepath,
        'full_text': full_text,
        'title': title,
        'author': author,
    }

    return doc
Exemplo n.º 8
0
def generate_mobi_name(in_filename,
                       template=Template(u'$author - $title.$extn')):
    # NOTE assume in_filename is in correct encoding (ideally Unicode string) and will "just work"
    book = Mobi(in_filename)
    book.parse()
    # title is sometimes different compared with "503" entry. E.g. compare 'Broken Homes' and 'Broken Homes (PC Peter Grant Book 4)' for https://www.amazon.com/Broken-Homes-Peter-Grant-Book-ebook/dp/B00DYX9OPC/
    author, title = book.author(), book.title(
    )  # returns bytes. NOTE not going to use these...
    #print(type(author))
    #print((author, title))
    book_codepage = book.config.get('mobi', {}).get(
        'text Encoding', 1252
    )  # not sure if this is text encoding for content or all meta data (e.g. titles)
    #print(book_codepage)
    """
    import pprint
    pprint.pprint(book.config)
    """
    BOOK_CODEPAGE2ENCODING = {
        1252: 'windows-1252',
        65001: 'utf-8',
    }
    #print(BOOK_CODEPAGE2ENCODING[book_codepage])  # use this with names
    #print((author, title))
    author = book.config['exth']['records'][100]
    title = book.config['exth']['records'].get(503) or book.config['mobi'].get(
        'Full Name'
    )  # NOTE if both exist, may want the longest one. So far all books from Amazon I've seen have consistently been 503 (e.g. "SERIES Book X")
    author = author.decode(BOOK_CODEPAGE2ENCODING[book_codepage])
    title = title.decode(BOOK_CODEPAGE2ENCODING[book_codepage])
    extn = os.path.splitext(in_filename)[1]
    extn = extn[1:]  # removed leading period
    #print((author, title, extn))
    #print('%s - %s' % (author, title))
    new_filename = template.substitute(author=author, title=title,
                                       extn=extn)  # TODO use a dict?
    #new_filename = '      .... ??? <1of 2>  "hello"...........' ## DEBUG
    new_filename = safe_filename(new_filename)
    #print(repr(new_filename))
    return new_filename
Exemplo n.º 9
0
    def __init__(self, path_to_folder):
        self.path = path_to_folder
        files = []
        for r, d, f in os.walk(self.path):
            for file in f:
                if '.mobi' in file:
                    files.append(os.path.join(r, file))

        for f in files:
            book = Mobi(f)
            book.parse()
            try:
                title = book.title()
                author = book.author()

                author = author.decode()
                title = title.decode()

                new_name = str("Kindle/" + author + " - " + title + ".mobi")
                os.rename(f, new_name)

            except:
                print("pass", f)
Exemplo n.º 10
0
from mobi import Mobi
import os
path = os.path.dirname(__file__)
book = Mobi(f"{path}/test/CharlesDarwin.mobi");
book.parse();

for record in book:
  print(record)

import pprint
pprint.pprint(book.config)
Exemplo n.º 11
0
from mobi import Mobi
import pprint

book = Mobi("test/CharlesDarwin.mobi")
book.parse()

for record in book:
    print(record)

pprint.pprint(book.config)
Exemplo n.º 12
0
 def setUp(self):
     path = pathjoin(PATH, self.path)
     self.mobi = Mobi(path)
Exemplo n.º 13
0
from mobi import Mobi

collins = "C:/Users/Anchpop/Google Drive/book/Calibre library/HarperCollins Publishers/Collins French to English (One Way) (115)/Collins French to English (One - HarperCollins Publishers.mobi"
potter = "C:/Users/Anchpop/Google Drive/book/Calibre library/J.K. Rowling/Harry Potter et la Coupe de Feu (La (110)/Harry Potter et la Coupe de Feu - J.K. Rowling.mobi"

book = Mobi(potter)
book.parse()

for record in book:
    if record:
        print(record)
Exemplo n.º 14
0
#!/usr/bin/env python2.7
from mobi import Mobi

book = Mobi("../zdic.prc");
book.parse();

for record in book:
	print record,