Python MetaInformation.rating示例

 def setUp(self):
     self.tdir = PersistentTemporaryDirectory('_calibre_dbtest')
     self.db = LibraryDatabase2(self.tdir)
     f = open(os.path.join(self.tdir, 'test.txt'), 'w+b')
     f.write('test')
     paths = list(repeat(f, 3))
     formats = list(repeat('txt', 3))
     m1 = MetaInformation('Test Ebook 1', ['Test Author 1'])
     m1.tags = ['tag1', 'tag2']
     m1.publisher = 'Test Publisher 1'
     m1.rating = 2
     m1.series = 'Test Series 1'
     m1.series_index = 3
     m1.author_sort = 'as1'
     m1.isbn = 'isbn1'
     m1.cover_data = ('jpg', self.img)
     m2 = MetaInformation('Test Ebook 2', ['Test Author 2'])
     m2.tags = ['tag3', 'tag4']
     m2.publisher = 'Test Publisher 2'
     m2.rating = 3
     m2.series = 'Test Series 2'
     m2.series_index = 1
     m2.author_sort = 'as1'
     m2.isbn = 'isbn1'
     self.db.add_books(paths, formats, [m1, m2, m2], add_duplicates=True)
     self.m1, self.m2 = m1, m2

示例#2

显示文件

文件： test.py 项目： AEliu/calibre

 def setUp(self):
     self.tdir    = PersistentTemporaryDirectory('_calibre_dbtest')
     self.db      = LibraryDatabase2(self.tdir)
     f = open(os.path.join(self.tdir, 'test.txt'), 'w+b')
     f.write('test')
     paths = list(repeat(f, 3))
     formats = list(repeat('txt', 3))
     m1 = MetaInformation('Test Ebook 1', ['Test Author 1'])
     m1.tags = ['tag1', 'tag2']
     m1.publisher = 'Test Publisher 1'
     m1.rating = 2
     m1.series = 'Test Series 1'
     m1.series_index = 3
     m1.author_sort = 'as1'
     m1.isbn = 'isbn1'
     m1.cover_data = ('jpg', self.img)
     m2 = MetaInformation('Test Ebook 2', ['Test Author 2'])
     m2.tags = ['tag3', 'tag4']
     m2.publisher = 'Test Publisher 2'
     m2.rating = 3
     m2.series = 'Test Series 2'
     m2.series_index = 1
     m2.author_sort = 'as1'
     m2.isbn = 'isbn1'
     self.db.add_books(paths, formats, [m1, m2, m2], add_duplicates=True)
     self.m1, self.m2 = m1, m2

示例#3

显示文件

文件： library_thing.py 项目： 089git/calibre

def get_social_metadata(title, authors, publisher, isbn, username=None,
        password=None):
    from calibre.ebooks.metadata import MetaInformation
    mi = MetaInformation(title, authors)
    if isbn:
        br = get_browser()
        try:
            login(br, username, password)

            raw = br.open_novisit('http://www.librarything.com/isbn/'
                        +isbn).read()
        except:
            return mi
        if '/wiki/index.php/HelpThing:Verify' in raw:
            raise Exception('LibraryThing is blocking calibre.')
        if not raw:
            return mi
        raw = raw.decode('utf-8', 'replace')
        raw = strip_encoding_declarations(raw)
        root = html.fromstring(raw)
        h1 = root.xpath('//div[@class="headsummary"]/h1')
        if h1 and not mi.title:
            mi.title = html.tostring(h1[0], method='text', encoding=unicode)
        h2 = root.xpath('//div[@class="headsummary"]/h2/a')
        if h2 and not mi.authors:
            mi.authors = [html.tostring(x, method='text', encoding=unicode) for
                    x in h2]
        h3 = root.xpath('//div[@class="headsummary"]/h3/a')
        if h3:
            match = None
            for h in h3:
               series = html.tostring(h, method='text', encoding=unicode)
               match = re.search(r'(.+) \((.+)\)', series)
               if match is not None:
                   break
            if match is not None:
                mi.series = match.group(1).strip()
                match = re.search(r'[0-9.]+', match.group(2))
                si = 1.0
                if match is not None:
                    si = float(match.group())
                mi.series_index = si
        #tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a')
        #if tags:
        #    mi.tags = [html.tostring(x, method='text', encoding=unicode) for x
        #            in tags]
        span = root.xpath(
                '//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span')
        if span:
            raw = html.tostring(span[0], method='text', encoding=unicode)
            match = re.search(r'([0-9.]+)', raw)
            if match is not None:
                rating = float(match.group())
                if rating > 0 and rating <= 5:
                    mi.rating = rating
    return mi

示例#4

显示文件

def get_metadata_(src, encoding=None):
    if not isinstance(src, unicode):
        if not encoding:
            src = xml_to_unicode(src)[0]
        else:
            src = src.decode(encoding, 'replace')

    # Meta data definitions as in
    # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9

    # Title
    title = None
    pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    src = src[:150000]  # Searching shouldn't take too long
    match = pat.search(src)
    if match:
        title = match.group(2)
    else:
        for x in ('DC.title', 'DCTERMS.title', 'Title'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                title = match.group(1)
                break
    if not title:
        pat = re.compile('<title>([^<>]+?)</title>', re.IGNORECASE)
        match = pat.search(src)
        if match:
            title = match.group(1)

    # Author
    author = None
    pat = re.compile(r'<!--.*?AUTHOR=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        author = match.group(2).replace(',', ';')
    else:
        for x in ('Author', 'DC.creator.aut', 'DCTERMS.creator.aut',
                  'DC.creator'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                author = match.group(1)
                break

    # Create MetaInformation with Title and Author
    ent_pat = re.compile(r'&(\S+)?;')
    if title:
        title = ent_pat.sub(entity_to_unicode, title)
    if author:
        author = ent_pat.sub(entity_to_unicode, author)
    mi = MetaInformation(title, [author] if author else None)

    # Publisher
    publisher = None
    pat = re.compile(r'<!--.*?PUBLISHER=(?P<q>[\'"])(.+?)(?P=q).*?-->',
                     re.DOTALL)
    match = pat.search(src)
    if match:
        publisher = match.group(2)
    else:
        for x in ('Publisher', 'DC.publisher', 'DCTERMS.publisher'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                publisher = match.group(1)
                break
    if publisher:
        mi.publisher = ent_pat.sub(entity_to_unicode, publisher)

    # ISBN
    isbn = None
    pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        isbn = match.group(1)
    else:
        for x in ('ISBN', 'DC.identifier.ISBN', 'DCTERMS.identifier.ISBN'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                isbn = match.group(1)
                break
    if isbn:
        mi.isbn = re.sub(r'[^0-9xX]', '', isbn)

    # LANGUAGE
    language = None
    pat = re.compile(r'<!--.*?LANGUAGE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        language = match.group(1)
    else:
        for x in ('DC.language', 'DCTERMS.language'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                language = match.group(1)
                break
    if language:
        mi.language = language

    # PUBDATE
    pubdate = None
    pat = re.compile(r'<!--.*?PUBDATE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        pubdate = match.group(1)
    else:
        for x in ('Pubdate', 'Date of publication', 'DC.date.published',
                  'DC.date.publication', 'DC.date.issued', 'DCTERMS.issued'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                pubdate = match.group(1)
                break
    if pubdate:
        try:
            mi.pubdate = parse_date(pubdate)
        except:
            pass

    # TIMESTAMP
    timestamp = None
    pat = re.compile(r'<!--.*?TIMESTAMP=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        timestamp = match.group(1)
    else:
        for x in ('Timestamp', 'Date of creation', 'DC.date.created',
                  'DC.date.creation', 'DCTERMS.created'):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                timestamp = match.group(1)
                break
    if timestamp:
        try:
            mi.timestamp = parse_date(timestamp)
        except:
            pass

    # SERIES
    series = None
    pat = re.compile(r'<!--.*?SERIES=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        series = match.group(1)
    else:
        pat = get_meta_regexp_("Series")
        match = pat.search(src)
        if match:
            series = match.group(1)
    if series:
        pat = re.compile(r'\[([.0-9]+)\]')
        match = pat.search(series)
        series_index = None
        if match is not None:
            try:
                series_index = float(match.group(1))
            except:
                pass
            series = series.replace(match.group(), '').strip()

        mi.series = ent_pat.sub(entity_to_unicode, series)
        if series_index is None:
            pat = get_meta_regexp_("Seriesnumber")
            match = pat.search(src)
            if match:
                try:
                    series_index = float(match.group(1))
                except:
                    pass
        if series_index is not None:
            mi.series_index = series_index

    # RATING
    rating = None
    pat = re.compile(r'<!--.*?RATING=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        rating = match.group(1)
    else:
        pat = get_meta_regexp_("Rating")
        match = pat.search(src)
        if match:
            rating = match.group(1)
    if rating:
        try:
            mi.rating = float(rating)
            if mi.rating < 0:
                mi.rating = 0
            if mi.rating > 5:
                mi.rating /= 2.
            if mi.rating > 5:
                mi.rating = 0
        except:
            pass

    # COMMENTS
    comments = None
    pat = re.compile(r'<!--.*?COMMENTS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        comments = match.group(1)
    else:
        pat = get_meta_regexp_("Comments")
        match = pat.search(src)
        if match:
            comments = match.group(1)
    if comments:
        mi.comments = ent_pat.sub(entity_to_unicode, comments)

    # TAGS
    tags = None
    pat = re.compile(r'<!--.*?TAGS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        tags = match.group(1)
    else:
        pat = get_meta_regexp_("Tags")
        match = pat.search(src)
        if match:
            tags = match.group(1)
    if tags:
        mi.tags = [
            x.strip() for x in ent_pat.sub(entity_to_unicode, tags).split(",")
        ]

    # Ready to return MetaInformation
    return mi

示例#5

显示文件

def get_social_metadata(title,
                        authors,
                        publisher,
                        isbn,
                        username=None,
                        password=None):
    from calibre.ebooks.metadata import MetaInformation
    mi = MetaInformation(title, authors)
    if isbn:
        br = get_browser()
        try:
            login(br, username, password)

            raw = br.open_novisit('http://www.librarything.com/isbn/' +
                                  isbn).read()
        except:
            return mi
        if '/wiki/index.php/HelpThing:Verify' in raw:
            raise Exception('LibraryThing is blocking calibre.')
        if not raw:
            return mi
        raw = raw.decode('utf-8', 'replace')
        raw = strip_encoding_declarations(raw)
        root = html.fromstring(raw)
        h1 = root.xpath('//div[@class="headsummary"]/h1')
        if h1 and not mi.title:
            mi.title = html.tostring(h1[0], method='text', encoding=unicode)
        h2 = root.xpath('//div[@class="headsummary"]/h2/a')
        if h2 and not mi.authors:
            mi.authors = [
                html.tostring(x, method='text', encoding=unicode) for x in h2
            ]
        h3 = root.xpath('//div[@class="headsummary"]/h3/a')
        if h3:
            match = None
            for h in h3:
                series = html.tostring(h, method='text', encoding=unicode)
                match = re.search(r'(.+) \((.+)\)', series)
                if match is not None:
                    break
            if match is not None:
                mi.series = match.group(1).strip()
                match = re.search(r'[0-9.]+', match.group(2))
                si = 1.0
                if match is not None:
                    si = float(match.group())
                mi.series_index = si
        #tags = root.xpath('//div[@class="tags"]/span[@class="tag"]/a')
        #if tags:
        #    mi.tags = [html.tostring(x, method='text', encoding=unicode) for x
        #            in tags]
        span = root.xpath(
            '//table[@class="wsltable"]/tr[@class="wslcontent"]/td[4]//span')
        if span:
            raw = html.tostring(span[0], method='text', encoding=unicode)
            match = re.search(r'([0-9.]+)', raw)
            if match is not None:
                rating = float(match.group())
                if rating > 0 and rating <= 5:
                    mi.rating = rating
    return mi

示例#6

显示文件

文件： odt.py 项目： Sabesan2000/SOFE-QUAILTY-FINAL

def get_metadata(stream, extract_cover=True):
    whitespace = re.compile(r'\s+')

    def normalize(s):
        return whitespace.sub(' ', s).strip()

    with ZipFile(stream) as zf:
        meta = zf.read('meta.xml')
        root = fromstring(meta)

        def find(field):
            ns, tag = fields[field]
            ans = root.xpath('//ns0:{}'.format(tag), namespaces={'ns0': ns})
            if ans:
                return normalize(
                    tostring(ans[0],
                             method='text',
                             encoding='unicode',
                             with_tail=False)).strip()

        mi = MetaInformation(None, [])
        title = find('title')
        if title:
            mi.title = title
        creator = find('initial-creator') or find('creator')
        if creator:
            mi.authors = string_to_authors(creator)
        desc = find('description')
        if desc:
            mi.comments = desc
        lang = find('language')
        if lang and canonicalize_lang(lang):
            mi.languages = [canonicalize_lang(lang)]
        kw = find('keyword') or find('keywords')
        if kw:
            mi.tags = [x.strip() for x in kw.split(',') if x.strip()]
        data = {}
        for tag in root.xpath('//ns0:user-defined',
                              namespaces={'ns0': fields['user-defined'][0]}):
            name = (tag.get('{%s}name' % METANS) or '').lower()
            vtype = tag.get('{%s}value-type' % METANS) or 'string'
            val = tag.text
            if name and val:
                if vtype == 'boolean':
                    val = val == 'true'
                data[name] = val
        opfmeta = False  # we need this later for the cover
        opfnocover = False
        if data.get('opf.metadata'):
            # custom metadata contains OPF information
            opfmeta = True
            if data.get('opf.titlesort', ''):
                mi.title_sort = data['opf.titlesort']
            if data.get('opf.authors', ''):
                mi.authors = string_to_authors(data['opf.authors'])
            if data.get('opf.authorsort', ''):
                mi.author_sort = data['opf.authorsort']
            if data.get('opf.isbn', ''):
                isbn = check_isbn(data['opf.isbn'])
                if isbn is not None:
                    mi.isbn = isbn
            if data.get('opf.publisher', ''):
                mi.publisher = data['opf.publisher']
            if data.get('opf.pubdate', ''):
                mi.pubdate = parse_date(data['opf.pubdate'], assume_utc=True)
            if data.get('opf.identifiers'):
                try:
                    mi.identifiers = json.loads(data['opf.identifiers'])
                except Exception:
                    pass
            if data.get('opf.rating'):
                try:
                    mi.rating = max(0, min(float(data['opf.rating']), 10))
                except Exception:
                    pass
            if data.get('opf.series', ''):
                mi.series = data['opf.series']
                if data.get('opf.seriesindex', ''):
                    try:
                        mi.series_index = float(data['opf.seriesindex'])
                    except Exception:
                        mi.series_index = 1.0
            if data.get('opf.language', ''):
                cl = canonicalize_lang(data['opf.language'])
                if cl:
                    mi.languages = [cl]
            opfnocover = data.get('opf.nocover', False)
        if not opfnocover:
            try:
                read_cover(stream, zf, mi, opfmeta, extract_cover)
            except Exception:
                pass  # Do not let an error reading the cover prevent reading other data

    return mi

示例#7

显示文件

文件： epubsplit_plugin.py 项目： JimmXinu/EpubSplit

    def _do_split(self,
                  db,
                  source_id,
                  misource,
                  splitepub,
                  origlines,
                  newspecs,
                  deftitle=None):

        linenums, changedtocs, checkedalways = newspecs
        # logger.debug("updated tocs:%s"%changedtocs)
        if not self.has_lines(linenums):
            return
        #logger.debug("2:%s"%(time.time()-self.t))
        self.t = time.time()

        #logger.debug("linenums:%s"%linenums)

        defauthors = None

        if not deftitle and prefs['copytoctitle']:
            if linenums[0] in changedtocs:
                deftitle=changedtocs[linenums[0]][0] # already unicoded()'ed
            elif len(origlines[linenums[0]]['toc']) > 0:
                deftitle=unicode(origlines[linenums[0]]['toc'][0])
            #logger.debug("deftitle:%s"%deftitle)

        if not deftitle and prefs['copytitle']:
            deftitle = _("%s Split") % misource.title

        if prefs['copyauthors']:
            defauthors = misource.authors

        mi = MetaInformation(deftitle,defauthors)

        if prefs['copytags']:
            mi.tags = misource.tags # [item for sublist in tagslists for item in sublist]

        if prefs['copylanguages']:
            mi.languages = misource.languages

        if prefs['copyseries']:
            mi.series = misource.series

        if prefs['copydate']:
            mi.timestamp = misource.timestamp

        if prefs['copyrating']:
            mi.rating = misource.rating

        if prefs['copypubdate']:
            mi.pubdate = misource.pubdate

        if prefs['copypublisher']:
            mi.publisher = misource.publisher

        if prefs['copyidentifiers']:
            mi.set_identifiers(misource.get_identifiers())

        if prefs['copycomments'] and misource.comments:
            mi.comments = "<p>"+_("Split from:")+"</p>" + misource.comments

        #logger.debug("mi:%s"%mi)
        book_id = db.create_book_entry(mi,
                                       add_duplicates=True)

        if prefs['copycover'] and misource.has_cover:
            db.set_cover(book_id, db.cover(source_id,index_is_id=True))

        #logger.debug("3:%s"%(time.time()-self.t))
        self.t = time.time()

        custom_columns = self.gui.library_view.model().custom_columns
        for col, action in prefs['custom_cols'].iteritems():
            #logger.debug("col: %s action: %s"%(col,action))

            if col not in custom_columns:
                #logger.debug("%s not an existing column, skipping."%col)
                continue

            coldef = custom_columns[col]
            #logger.debug("coldef:%s"%coldef)
            label = coldef['label']
            value = db.get_custom(source_id, label=label, index_is_id=True)
            if value:
                db.set_custom(book_id,value,label=label,commit=False)

        #logger.debug("3.5:%s"%(time.time()-self.t))
        self.t = time.time()

        if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \
                and prefs['sourcetemplate']:
            val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error', misource)
            #logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val))
            label = custom_columns[prefs['sourcecol']]['label']
            if custom_columns[prefs['sourcecol']]['datatype'] == 'series':
                val = val + (" [%s]"%self.book_count)
            db.set_custom(book_id, val, label=label, commit=False)
        self.book_count = self.book_count+1
        db.commit()

        #logger.debug("4:%s"%(time.time()-self.t))
        self.t = time.time()

        self.gui.library_view.model().books_added(1)
        self.gui.library_view.select_rows([book_id])

        #logger.debug("5:%s"%(time.time()-self.t))
        self.t = time.time()

        editconfig_txt = _('You can enable or disable Edit Metadata in Preferences > Plugins > EpubSplit.')
        if prefs['editmetadata']:
            confirm('\n'+_('''The book for the new Split EPUB has been created and default metadata filled in.

However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.

You can fill in the metadata yourself, or use download metadata for known books.

If you download or add a cover image, it will be included in the generated EPUB.''')+'\n\n'+
                    editconfig_txt+'\n',
                    'epubsplit_created_now_edit_again',
                    self.gui)
            self.gui.iactions['Edit Metadata'].edit_metadata(False)

        try:
            QApplication.setOverrideCursor(QCursor(Qt.WaitCursor))
            #logger.debug("5:%s"%(time.time()-self.t))
            self.t = time.time()
            self.gui.tags_view.recount()

            self.gui.status_bar.show_message(_('Splitting off from EPUB...'), 60000)

            mi = db.get_metadata(book_id,index_is_id=True)

            outputepub = PersistentTemporaryFile(suffix='.epub')

            coverjpgpath = None
            if mi.has_cover:
                # grab the path to the real image.
                coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg')

            outlist = list(set(linenums + checkedalways))
            outlist.sort()
            splitepub.write_split_epub(outputepub,
                                       outlist,
                                       changedtocs=changedtocs,
                                       authoropts=mi.authors,
                                       titleopt=mi.title,
                                       descopt=mi.comments,
                                       tags=mi.tags,
                                       languages=mi.languages,
                                       coverjpgpath=coverjpgpath)

            #logger.debug("6:%s"%(time.time()-self.t))
            self.t = time.time()
            db.add_format_with_hooks(book_id,
                                     'EPUB',
                                     outputepub, index_is_id=True)

            #logger.debug("7:%s"%(time.time()-self.t))
            self.t = time.time()

            self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000)
            self.gui.library_view.model().refresh_ids([book_id])
            self.gui.tags_view.recount()
            current = self.gui.library_view.currentIndex()
            self.gui.library_view.model().current_changed(current, self.previous)
        finally:
            QApplication.restoreOverrideCursor()

        if not prefs['editmetadata']:
            confirm('<p>'+
                    '</p><p>'.join([_('<b><u>%s</u> by %s</b> has been created and default metadata filled in.')%(mi.title,', '.join(mi.authors)),
                                   _('EpubSplit now skips the Edit Metadata step by default.'),
                                   editconfig_txt])+
                    '</p>',
                    'epubsplit_created_now_no_edit_again',
                    self.gui)

示例#8

显示文件

    def _do_split(self,
                  db,
                  source_id,
                  misource,
                  splitepub,
                  newspecs,
                  deftitle=None,
                  editmeta=True):

        linenums, changedtocs = newspecs
        # logger.debug("updated tocs:%s"%changedtocs)

        # logger.debug("2:%s"%(time.time()-self.t))
        self.t = time.time()

        # logger.debug("linenums:%s"%linenums)

        defauthors = None

        if not deftitle and prefs['copytitle']:
            deftitle = _("نمونه %s") % misource.title

        if prefs['copyauthors']:
            defauthors = misource.authors

        mi = MetaInformation(deftitle, defauthors)

        if prefs['copytags']:
            mi.tags = misource.tags  # [item for sublist in tagslists for item in sublist]

        if prefs['copylanguages']:
            mi.languages = misource.languages

        if prefs['copyseries']:
            mi.series = misource.series

        if prefs['copydate']:
            mi.timestamp = misource.timestamp

        if prefs['copyrating']:
            mi.rating = misource.rating

        if prefs['copypubdate']:
            mi.pubdate = misource.pubdate

        if prefs['copypublisher']:
            mi.publisher = misource.publisher

        if prefs['copyidentifiers']:
            mi.set_identifiers(misource.get_identifiers())

        if prefs['copycomments'] and misource.comments:
            mi.comments = _("Split from:") + "\n\n" + misource.comments

        # logger.debug("mi:%s"%mi)
        book_id = db.create_book_entry(mi,
                                       add_duplicates=True)

        if prefs['copycover'] and misource.has_cover:
            db.set_cover(book_id, db.cover(source_id, index_is_id=True))

        # logger.debug("3:%s"%(time.time()-self.t))
        self.t = time.time()

        custom_columns = self.gui.library_view.model().custom_columns
        for col, action in prefs['custom_cols'].iteritems():
            # logger.debug("col: %s action: %s"%(col,action))

            if col not in custom_columns:
                # logger.debug("%s not an existing column, skipping."%col)
                continue

            coldef = custom_columns[col]
            # logger.debug("coldef:%s"%coldef)
            label = coldef['label']
            value = db.get_custom(source_id, label=label, index_is_id=True)
            if value:
                db.set_custom(book_id, value, label=label, commit=False)

        # logger.debug("3.5:%s"%(time.time()-self.t))
        self.t = time.time()

        if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \
                and prefs['sourcetemplate']:
            val = SafeFormat().safe_format(prefs['sourcetemplate'], misource, 'EpubSplit Source Template Error',
                                           misource)
            # logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val))
            label = custom_columns[prefs['sourcecol']]['label']
            db.set_custom(book_id, val, label=label, commit=False)

        db.commit()

        # logger.debug("4:%s"%(time.time()-self.t))
        self.t = time.time()

        self.gui.library_view.model().books_added(1)
        self.gui.library_view.select_rows([book_id])

        # logger.debug("5:%s"%(time.time()-self.t))
        self.t = time.time()

        # if editmeta:
        #     confirm('\n'+_('کتاب نمونه ساخته شود؟')+'\n',
        #             'epubsplit_created_now_edit_again',
        #             self.gui)
        #
        #     self.gui.iactions['Edit Metadata'].edit_metadata(False)

        # logger.debug("5:%s"%(time.time()-self.t))
        self.t = time.time()
        self.gui.tags_view.recount()

        self.gui.status_bar.show_message(_('فایل نمونه ساخته شد'), 60000)

        mi = db.get_metadata(book_id, index_is_id=True)

        outputepub = PersistentTemporaryFile(suffix='.epub')

        coverjpgpath = None
        # if mi.has_cover:
        #     # grab the path to the real image.
        #     coverjpgpath = os.path.join(db.library_path, db.path(book_id, index_is_id=True), 'cover.jpg')

        splitepub.write_split_epub(outputepub,
                                   linenums,
                                   changedtocs=changedtocs,
                                   authoropts=mi.authors,
                                   titleopt=mi.title,
                                   descopt=mi.comments,
                                   tags=mi.tags,
                                   languages=mi.languages,
                                   coverjpgpath=coverjpgpath)

        # logger.debug("6:%s"%(time.time()-self.t))
        self.t = time.time()
        db.add_format_with_hooks(book_id,
                                 'EPUB',
                                 outputepub, index_is_id=True)

        # logger.debug("7:%s"%(time.time()-self.t))
        self.t = time.time()

        self.gui.status_bar.show_message(_('Finished splitting off EPUB.'), 3000)
        self.gui.library_view.model().refresh_ids([book_id])
        self.gui.tags_view.recount()
        current = self.gui.library_view.currentIndex()
        self.gui.library_view.model().current_changed(current, self.previous)

示例#9

显示文件

    def _do_split(self,
                  db,
                  source_id,
                  misource,
                  splitepub,
                  origlines,
                  newspecs,
                  deftitle=None):

        linenums, changedtocs, checkedalways = newspecs
        # logger.debug("updated tocs:%s"%changedtocs)
        if not self.has_lines(linenums):
            return
        #logger.debug("2:%s"%(time.time()-self.t))
        self.t = time.time()

        #logger.debug("linenums:%s"%linenums)

        defauthors = None

        if not deftitle and prefs['copytoctitle']:
            if linenums[0] in changedtocs:
                deftitle = changedtocs[linenums[0]][0]  # already unicoded()'ed
            elif len(origlines[linenums[0]]['toc']) > 0:
                deftitle = unicode(origlines[linenums[0]]['toc'][0])
            #logger.debug("deftitle:%s"%deftitle)

        if not deftitle and prefs['copytitle']:
            deftitle = _("%s Split") % misource.title

        if prefs['copyauthors']:
            defauthors = misource.authors

        mi = MetaInformation(deftitle, defauthors)

        if prefs['copytags']:
            mi.tags = misource.tags  # [item for sublist in tagslists for item in sublist]

        if prefs['copylanguages']:
            mi.languages = misource.languages

        if prefs['copyseries']:
            mi.series = misource.series

        if prefs['copydate']:
            mi.timestamp = misource.timestamp

        if prefs['copyrating']:
            mi.rating = misource.rating

        if prefs['copypubdate']:
            mi.pubdate = misource.pubdate

        if prefs['copypublisher']:
            mi.publisher = misource.publisher

        if prefs['copyidentifiers']:
            mi.set_identifiers(misource.get_identifiers())

        if prefs['copycomments'] and misource.comments:
            mi.comments = "<p>" + _("Split from:") + "</p>" + misource.comments

        #logger.debug("mi:%s"%mi)
        book_id = db.create_book_entry(mi, add_duplicates=True)

        if prefs['copycover'] and misource.has_cover:
            db.set_cover(book_id, db.cover(source_id, index_is_id=True))

        #logger.debug("3:%s"%(time.time()-self.t))
        self.t = time.time()

        custom_columns = self.gui.library_view.model().custom_columns
        for col, action in six.iteritems(prefs['custom_cols']):
            #logger.debug("col: %s action: %s"%(col,action))

            if col not in custom_columns:
                #logger.debug("%s not an existing column, skipping."%col)
                continue

            coldef = custom_columns[col]
            #logger.debug("coldef:%s"%coldef)
            label = coldef['label']
            value = db.get_custom(source_id, label=label, index_is_id=True)
            if value:
                db.set_custom(book_id, value, label=label, commit=False)

        #logger.debug("3.5:%s"%(time.time()-self.t))
        self.t = time.time()

        if prefs['sourcecol'] != '' and prefs['sourcecol'] in custom_columns \
                and prefs['sourcetemplate']:
            val = SafeFormat().safe_format(prefs['sourcetemplate'], misource,
                                           'EpubSplit Source Template Error',
                                           misource)
            #logger.debug("Attempting to set %s to %s"%(prefs['sourcecol'],val))
            label = custom_columns[prefs['sourcecol']]['label']
            if custom_columns[prefs['sourcecol']]['datatype'] == 'series':
                val = val + (" [%s]" % self.book_count)
            db.set_custom(book_id, val, label=label, commit=False)
        self.book_count = self.book_count + 1
        db.commit()

        #logger.debug("4:%s"%(time.time()-self.t))
        self.t = time.time()

        self.gui.library_view.model().books_added(1)
        self.gui.library_view.select_rows([book_id])

        #logger.debug("5:%s"%(time.time()-self.t))
        self.t = time.time()

        editconfig_txt = _(
            'You can enable or disable Edit Metadata in Preferences > Plugins > EpubSplit.'
        )
        if prefs['editmetadata']:
            confirm(
                '\n' +
                _('''The book for the new Split EPUB has been created and default metadata filled in.

However, the EPUB will *not* be created until after you've reviewed, edited, and closed the metadata dialog that follows.

You can fill in the metadata yourself, or use download metadata for known books.

If you download or add a cover image, it will be included in the generated EPUB.'''
                  ) + '\n\n' + editconfig_txt + '\n',
                'epubsplit_created_now_edit_again', self.gui)
            self.gui.iactions['Edit Metadata'].edit_metadata(False)

        try:
            QApplication.setOverrideCursor(QCursor(Qt.WaitCursor))
            #logger.debug("5:%s"%(time.time()-self.t))
            self.t = time.time()
            self.gui.tags_view.recount()

            self.gui.status_bar.show_message(_('Splitting off from EPUB...'),
                                             60000)

            mi = db.get_metadata(book_id, index_is_id=True)

            outputepub = PersistentTemporaryFile(suffix='.epub')

            coverjpgpath = None
            if mi.has_cover:
                # grab the path to the real image.
                coverjpgpath = os.path.join(db.library_path,
                                            db.path(book_id, index_is_id=True),
                                            'cover.jpg')

            outlist = list(set(linenums + checkedalways))
            outlist.sort()
            splitepub.write_split_epub(outputepub,
                                       outlist,
                                       changedtocs=changedtocs,
                                       authoropts=mi.authors,
                                       titleopt=mi.title,
                                       descopt=mi.comments,
                                       tags=mi.tags,
                                       languages=mi.languages,
                                       coverjpgpath=coverjpgpath)

            #logger.debug("6:%s"%(time.time()-self.t))
            self.t = time.time()
            db.add_format_with_hooks(book_id,
                                     'EPUB',
                                     outputepub,
                                     index_is_id=True)

            #logger.debug("7:%s"%(time.time()-self.t))
            self.t = time.time()

            self.gui.status_bar.show_message(_('Finished splitting off EPUB.'),
                                             3000)
            self.gui.library_view.model().refresh_ids([book_id])
            self.gui.tags_view.recount()
            current = self.gui.library_view.currentIndex()
            self.gui.library_view.model().current_changed(
                current, self.previous)
            if self.gui.cover_flow:
                self.gui.cover_flow.dataChanged()
        finally:
            QApplication.restoreOverrideCursor()

        if not prefs['editmetadata']:
            confirm(
                '<p>' + '</p><p>'.join([
                    _('<b><u>%s</u> by %s</b> has been created and default metadata filled in.'
                      ) % (mi.title, ', '.join(mi.authors)),
                    _('EpubSplit now skips the Edit Metadata step by default.'
                      ), editconfig_txt
                ]) + '</p>', 'epubsplit_created_now_no_edit_again', self.gui)

示例#10

显示文件

文件： html.py 项目： Eksmo/calibre

def get_metadata_(src, encoding=None):
    if not isinstance(src, unicode):
        if not encoding:
            src = xml_to_unicode(src)[0]
        else:
            src = src.decode(encoding, "replace")

    # Meta data definitions as in
    # http://www.mobileread.com/forums/showpost.php?p=712544&postcount=9

    # Title
    title = None
    pat = re.compile(r'<!--.*?TITLE=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    src = src[:150000]  # Searching shouldn't take too long
    match = pat.search(src)
    if match:
        title = match.group(2)
    else:
        for x in ("DC.title", "DCTERMS.title", "Title"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                title = match.group(1)
                break
    if not title:
        pat = re.compile("<title>([^<>]+?)</title>", re.IGNORECASE)
        match = pat.search(src)
        if match:
            title = match.group(1)

    # Author
    author = None
    pat = re.compile(r'<!--.*?AUTHOR=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        author = match.group(2).replace(",", ";")
    else:
        for x in ("Author", "DC.creator.aut", "DCTERMS.creator.aut", "DC.creator"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                author = match.group(1)
                break

    # Create MetaInformation with Title and Author
    ent_pat = re.compile(r"&(\S+)?;")
    if title:
        title = ent_pat.sub(entity_to_unicode, title)
    if author:
        author = ent_pat.sub(entity_to_unicode, author)
    mi = MetaInformation(title, [author] if author else None)

    # Publisher
    publisher = None
    pat = re.compile(r'<!--.*?PUBLISHER=(?P<q>[\'"])(.+?)(?P=q).*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        publisher = match.group(2)
    else:
        for x in ("Publisher", "DC.publisher", "DCTERMS.publisher"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                publisher = match.group(1)
                break
    if publisher:
        mi.publisher = ent_pat.sub(entity_to_unicode, publisher)

    # ISBN
    isbn = None
    pat = re.compile(r'<!--.*?ISBN=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        isbn = match.group(1)
    else:
        for x in ("ISBN", "DC.identifier.ISBN", "DCTERMS.identifier.ISBN"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                isbn = match.group(1)
                break
    if isbn:
        mi.isbn = re.sub(r"[^0-9xX]", "", isbn)

    # LANGUAGE
    language = None
    pat = re.compile(r'<!--.*?LANGUAGE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        language = match.group(1)
    else:
        for x in ("DC.language", "DCTERMS.language"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                language = match.group(1)
                break
    if language:
        mi.language = language

    # PUBDATE
    pubdate = None
    pat = re.compile(r'<!--.*?PUBDATE=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        pubdate = match.group(1)
    else:
        for x in (
            "Pubdate",
            "Date of publication",
            "DC.date.published",
            "DC.date.publication",
            "DC.date.issued",
            "DCTERMS.issued",
        ):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                pubdate = match.group(1)
                break
    if pubdate:
        try:
            mi.pubdate = parse_date(pubdate)
        except:
            pass

    # TIMESTAMP
    timestamp = None
    pat = re.compile(r'<!--.*?TIMESTAMP=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        timestamp = match.group(1)
    else:
        for x in ("Timestamp", "Date of creation", "DC.date.created", "DC.date.creation", "DCTERMS.created"):
            pat = get_meta_regexp_(x)
            match = pat.search(src)
            if match:
                timestamp = match.group(1)
                break
    if timestamp:
        try:
            mi.timestamp = parse_date(timestamp)
        except:
            pass

    # SERIES
    series = None
    pat = re.compile(r'<!--.*?SERIES=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        series = match.group(1)
    else:
        pat = get_meta_regexp_("Series")
        match = pat.search(src)
        if match:
            series = match.group(1)
    if series:
        pat = re.compile(r"\[([.0-9]+)\]")
        match = pat.search(series)
        series_index = None
        if match is not None:
            try:
                series_index = float(match.group(1))
            except:
                pass
            series = series.replace(match.group(), "").strip()

        mi.series = ent_pat.sub(entity_to_unicode, series)
        if series_index is None:
            pat = get_meta_regexp_("Seriesnumber")
            match = pat.search(src)
            if match:
                try:
                    series_index = float(match.group(1))
                except:
                    pass
        if series_index is not None:
            mi.series_index = series_index

    # RATING
    rating = None
    pat = re.compile(r'<!--.*?RATING=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        rating = match.group(1)
    else:
        pat = get_meta_regexp_("Rating")
        match = pat.search(src)
        if match:
            rating = match.group(1)
    if rating:
        try:
            mi.rating = float(rating)
            if mi.rating < 0:
                mi.rating = 0
            if mi.rating > 5:
                mi.rating /= 2.0
            if mi.rating > 5:
                mi.rating = 0
        except:
            pass

    # COMMENTS
    comments = None
    pat = re.compile(r'<!--.*?COMMENTS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        comments = match.group(1)
    else:
        pat = get_meta_regexp_("Comments")
        match = pat.search(src)
        if match:
            comments = match.group(1)
    if comments:
        mi.comments = ent_pat.sub(entity_to_unicode, comments)

    # TAGS
    tags = None
    pat = re.compile(r'<!--.*?TAGS=[\'"]([^"\']+)[\'"].*?-->', re.DOTALL)
    match = pat.search(src)
    if match:
        tags = match.group(1)
    else:
        pat = get_meta_regexp_("Tags")
        match = pat.search(src)
        if match:
            tags = match.group(1)
    if tags:
        mi.tags = [x.strip() for x in ent_pat.sub(entity_to_unicode, tags).split(",")]

    # Ready to return MetaInformation
    return mi