Exemplo n.º 1
0
    def __init__(self, path_to_folder):
        self.path = path_to_folder
        files = []
        for r, d, f in os.walk(self.path):
            for file in f:
                if '.mobi' in file:
                    files.append(os.path.join(r, file))


        for f in files:
            book = Mobi(f)
            book.parse()
            try:
                title = book.title()
                author = book.author()

                author = author.decode()
                title = title.decode()

                result = author + ' - ' + title + "\n"
                print(result)

                w = Files()
                w.save_file(result)


            except:
                print("pass", f)
Exemplo n.º 2
0
def readMobi(eBook):

    #Open up our book and parse it 
    book = Mobi(eBook)
    book.parse()
    #Return the information we want
    return(book.title().decode("utf-8") + " " + book.author().decode("utf-8"))
Exemplo n.º 3
0
def read_mobi_data(nombre_comp):
    try:
        book = Mobi(nombre_comp)
        book.parse()
        tit = book.title().decode('utf-8')
        aut = book.author().decode('utf-8')
        return [aut, tit, '.mobi']
    except Exception as err:
        print(os.path.join(nombre_comp) + ' ERROR:' + str(err))
        return [None, None, None]
Exemplo n.º 4
0
    def extract_metadata_mobi(self, book):
        book = Mobi(book)
        book.parse()
        try:
            cover_image = book.readImageRecord(0)
        except KeyError:
            cover_image = None
        title = book.title().decode("utf-8")
        author = book.author().decode("utf-8")
        book_config = book.config
        try:
            description = self.stripTags(
                book_config['exth']['records'][103].decode("utf-8"))
        except KeyError:
            description = None
        try:
            identifier = book_config['exth']['records'][104].decode("utf-8")
        except KeyError:
            identifier = None
        try:
            publisher = book_config['exth']['records'][101].decode("utf-8")
        except KeyError:
            publisher = None
        date = None
        rights = None
        try:
            ftags = book_config['exth']['records'][105].decode("utf-8")
            if ":" in ftags:
                ftags = ftags.replace(":", ",")
            elif ";" in ftags:
                ftags = ftags.replace(";", ",")
            # elif re.search(r"\s", ftags):  # Must be final assignment to avoid spliting on multiple delimeters
            #    ftags = ftags.replace(" ", ",")
        except KeyError:
            ftags = None

        return [
            title,
            author,
            cover_image,
            book.f.name,
            description,
            identifier,
            publisher,
            date,
            rights,
            ftags,
        ]
Exemplo n.º 5
0
def read_mobi(filepath):
    book = Mobi(filepath)
    book.parse()

    records = []
    for record in book:
        records.append(record)

    full_text = ' '.join(records)
    title = book.title().decode('utf-8')
    author = book.author().decode('utf-8')

    doc = {
        'filepath': filepath,
        'full_text': full_text,
        'title': title,
        'author': author,
    }

    return doc
Exemplo n.º 6
0
def generate_mobi_name(in_filename,
                       template=Template(u'$author - $title.$extn')):
    # NOTE assume in_filename is in correct encoding (ideally Unicode string) and will "just work"
    book = Mobi(in_filename)
    book.parse()
    # title is sometimes different compared with "503" entry. E.g. compare 'Broken Homes' and 'Broken Homes (PC Peter Grant Book 4)' for https://www.amazon.com/Broken-Homes-Peter-Grant-Book-ebook/dp/B00DYX9OPC/
    author, title = book.author(), book.title(
    )  # returns bytes. NOTE not going to use these...
    #print(type(author))
    #print((author, title))
    book_codepage = book.config.get('mobi', {}).get(
        'text Encoding', 1252
    )  # not sure if this is text encoding for content or all meta data (e.g. titles)
    #print(book_codepage)
    """
    import pprint
    pprint.pprint(book.config)
    """
    BOOK_CODEPAGE2ENCODING = {
        1252: 'windows-1252',
        65001: 'utf-8',
    }
    #print(BOOK_CODEPAGE2ENCODING[book_codepage])  # use this with names
    #print((author, title))
    author = book.config['exth']['records'][100]
    title = book.config['exth']['records'].get(503) or book.config['mobi'].get(
        'Full Name'
    )  # NOTE if both exist, may want the longest one. So far all books from Amazon I've seen have consistently been 503 (e.g. "SERIES Book X")
    author = author.decode(BOOK_CODEPAGE2ENCODING[book_codepage])
    title = title.decode(BOOK_CODEPAGE2ENCODING[book_codepage])
    extn = os.path.splitext(in_filename)[1]
    extn = extn[1:]  # removed leading period
    #print((author, title, extn))
    #print('%s - %s' % (author, title))
    new_filename = template.substitute(author=author, title=title,
                                       extn=extn)  # TODO use a dict?
    #new_filename = '      .... ??? <1of 2>  "hello"...........' ## DEBUG
    new_filename = safe_filename(new_filename)
    #print(repr(new_filename))
    return new_filename
Exemplo n.º 7
0
    def __init__(self, path_to_folder):
        self.path = path_to_folder
        files = []
        for r, d, f in os.walk(self.path):
            for file in f:
                if '.mobi' in file:
                    files.append(os.path.join(r, file))

        for f in files:
            book = Mobi(f)
            book.parse()
            try:
                title = book.title()
                author = book.author()

                author = author.decode()
                title = title.decode()

                new_name = str("Kindle/" + author + " - " + title + ".mobi")
                os.rename(f, new_name)

            except:
                print("pass", f)