Exemplo n.º 1
0
    def __init__(self, fn):
        self.filename = fn

        # Set some fields to defaults
        self.title = fn
        self.author = "??"
        self.language = "??"
        # Rob Addition: Description
        self.description = ""
        self.is_a_book = False

        f = open(fn)
        d = f.read(68)
        f.close()
        encodings = {1252: 'cp1252', 65001: 'utf-8'}
        supported_types = ('BOOKMOBI', 'TEXtREAd')
        self.type = d[60:68]

        if self.type not in supported_types:
            LOG(1, "Unsupported file type %s" % (self.type))
            return None

        try:
            db = parse_palmdb(fn)
        except:
            return None

        self.is_a_book = True
        # now we have a better guess at the title, use it for now
        self.title = db.name

        self.records = db.records
        rec0 = self.records[0].data
        #LOG(5,repr(rec0))
        if self.type == 'BOOKMOBI':
            LOG(3, "This is a MOBI book")
            self.mobi = {}
            for field, pos, fmt in MOBI_HDR_FIELDS:
                end = pos + calcsize(fmt)
                if (end > len(rec0) or ("header_len" in self.mobi
                                        and end > self.mobi["header_len"])):
                    continue
                LOG(
                    4, "field: %s, fmt: %s, @ [%d:%d], data: %s" %
                    (field, fmt, pos, end, repr(rec0[pos:end])))
                (self.mobi[field], ) = unpack(">%s" % fmt, rec0[pos:end])

            LOG(3, "self.mobi: %s" % repr(self.mobi))

            # Get and decode the book name
            if self.mobi['locale_language'] in LANGUAGES:
                lang = LANGUAGES[self.mobi['locale_language']]
                if self.mobi['locale_country'] == 0:
                    LOG(2, "Book language: %s" % lang[0][1])
                    self.language = "%s (%s)" % (lang[0][1], lang[0][0])
                elif self.mobi['locale_country'] in lang:
                    country = lang[self.mobi['locale_country']]
                    LOG(2,
                        "Book language is %s (%s)" % (lang[0][1], country[1]))
                    self.language = "%s (%s-%s)" % (lang[0][1], lang[0][0],
                                                    country[0])

            pos = self.mobi['full_name_offs']
            end = pos + self.mobi['full_name_len']
            self.title = rec0[pos:end].decode(encodings[self.mobi['encoding']])

            LOG(2, "Book name: %s" % self.title)

            if self.mobi['id'] != 'MOBI':
                LOG(0, "Mobi header missing!")
                return None

            if (0x40 & self.mobi['exth_flags']):  # check for EXTH
                self.exth = parse_exth(rec0, self.mobi['header_len'] + 16)
                LOG(3, "EXTH header: %s" % repr(self.exth))
                if 'author' in self.exth:
                    self.author = ' & '.join(self.exth['author'])
                else:
                    self.author = "n/a"
                self.rawdata = d

                if (('updated title' in self.exth)
                        and (type(self.exth['updated title']) is str)):
                    self.title = ' '.join(self.exth['updated title'])

                if 'description' in self.exth:
                    self.description = ' <P> '.join(self.exth['description'])

        elif self.type == 'TEXtREAd':
            LOG(2, "This is an older MOBI book")
            self.rawdata = d
            compression, data_len, rec_count, rec_size, pos = unpack(
                PRC_HDRFMT, rec0[:calcsize(PRC_HDRFMT)])
            LOG(
                3, "compression %d, data_len %d, rec_count %d, rec_size %d" %
                (compression, data_len, rec_count, rec_size))
            if compression == 2:
                data = uncompress(self.records[1].data)
            else:
                data = self.records[1].data
            from BeautifulSoup import BeautifulSoup
            soup = BeautifulSoup(data)

            self.metadata = soup.fetch("dc-metadata")
            try:
                self.title = soup.fetch("dc:title")[0].getText()
                self.author = soup.fetch("dc:creator")[0].getText()
                self.language = soup.fetch("dc:language")[0].getText()
            except:
                self.title, self.author, self.language = ("Unknown", "Unknown",
                                                          "en-us")
            try:
                self.description = soup.fetch("dc:description")[0].getText()
            except:
                pass
Exemplo n.º 2
0
 def to_html(self):
     last_idx = (self.mobi['first_image_idx']
                 if 'mobi' in self.__dict__ else -1)
     return ''.join([uncompress(x.data) for x in self.records[1:last_idx]])
Exemplo n.º 3
0
 def to_html(self):
     last_idx = (
         self.mobi['first_image_idx'] if 'mobi' in self.__dict__ else -1)
     return ''.join([uncompress(x.data) for x in self.records[1:last_idx]])
Exemplo n.º 4
0
    def __init__(self, fn):
        self.filename = fn

         # Set some fields to defaults
        self.title = fn
        self.author = "??"
        self.language = "??"
        self.is_a_book = False

        f = open(fn)
        d = f.read(68)
        f.close()
        encodings = {
                1252: 'cp1252',
                65001: 'utf-8'
                }
        supported_types = ('BOOKMOBI', 'TEXtREAd')
        self.type = d[60:68]

        if self.type not in supported_types:
            LOG(1, "Unsupported file type %s" % (self.type))
            return None

        try:
            db = parse_palmdb(fn)
        except:
            return None

        self.is_a_book = True
         # now we have a better guess at the title, use it for now
        self.title = db.name

        self.records = db.records
        rec0 = self.records[0].data
        #LOG(5,repr(rec0))
        if self.type == 'BOOKMOBI':
            LOG(3, "This is a MOBI book")
            self.mobi = {}
            for field, pos, fmt in MOBI_HDR_FIELDS:
                end = pos + calcsize(fmt)
                if (end > len(rec0) or
                    ("header_len" in self.mobi
                        and end > self.mobi["header_len"])):
                        continue
                LOG(4, "field: %s, fmt: %s, @ [%d:%d], data: %s" % (
                    field, fmt, pos, end, repr(rec0[pos:end])))
                (self.mobi[field], ) = unpack(">%s" % fmt, rec0[pos:end])

            LOG(3, "self.mobi: %s" % repr(self.mobi))

             # Get and decode the book name
            if self.mobi['locale_language'] in LANGUAGES:
                lang = LANGUAGES[self.mobi['locale_language']]
                if self.mobi['locale_country'] == 0:
                    LOG(2, "Book language: %s" % lang[0][1])
                    self.language = "%s (%s)" % (lang[0][1], lang[0][0])
                elif self.mobi['locale_country'] in lang:
                    country = lang[self.mobi['locale_country']]
                    LOG(2, "Book language is %s (%s)" % (
                        lang[0][1], country[1]))
                    self.language = "%s (%s-%s)" % (
                        lang[0][1],
                        lang[0][0],
                        country[0]
                        )

            pos = self.mobi['full_name_offs']
            end = pos + self.mobi['full_name_len']
            self.title = rec0[pos:end].decode(encodings[self.mobi['encoding']])

            LOG(2, "Book name: %s" % self.title)

            if self.mobi['id'] != 'MOBI':
                LOG(0, "Mobi header missing!")
                return None

            if (0x40 & self.mobi['exth_flags']):  # check for EXTH
                self.exth = parse_exth(rec0, self.mobi['header_len'] + 16)
                LOG(3, "EXTH header: %s" % repr(self.exth))
                if 'author' in self.exth:
                    self.author = ' & '.join(self.exth['author'])
                else:
                    self.author = "n/a"
                self.rawdata = d

                if (('updated title' in self.exth) and
                    (type(self.exth['updated title']) is str)):
                    self.title = ' '.join(self.exth['updated title'])

        elif self.type == 'TEXtREAd':
            LOG(2, "This is an older MOBI book")
            self.rawdata = d
            compression, data_len, rec_count, rec_size, pos = unpack(
                    PRC_HDRFMT, rec0[:calcsize(PRC_HDRFMT)])
            LOG(3, "compression %d, data_len %d, rec_count %d, rec_size %d" %
                    (compression, data_len, rec_count, rec_size))
            if compression == 2:
                data = uncompress(self.records[1].data)
            else:
                data = self.records[1].data
            from BeautifulSoup import BeautifulSoup
            soup = BeautifulSoup(data)

            self.metadata = soup.fetch("dc-metadata")
            try:
                self.title = soup.fetch("dc:title")[0].getText()
                self.author = soup.fetch("dc:creator")[0].getText()
                self.language = soup.fetch("dc:language")[0].getText()
            except:
                self.title, self.author, self.language = ("Unknown", "Unknown",
                        "en-us")
Exemplo n.º 5
0
def uncompress_lz77(data):
    """LZ77"""
    return lz77.uncompress(data)