Exemplos de get_word_count em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: calibre_plugins.fanficfare_plugin.wordcount

Método / Função: get_word_count

Exemplos em hotexamples.com: 2

get_word_count em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de calibre_plugins.fanficfare_plugin.wordcount.get_word_count em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

Arquivo: jobs.py Projeto: hseg/FanFicFare

def do_download_for_worker(book, options, merge, notification=lambda x, y: x): ''' Child job, to download story when run as a worker job ''' from calibre_plugins.fanficfare_plugin import FanFicFareBase fffbase = FanFicFareBase(options['plugin_path']) with fffbase: # so the sys.path was modified while loading the # plug impl. from calibre_plugins.fanficfare_plugin.dialogs import ( NotGoingToDownload, OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL) from calibre_plugins.fanficfare_plugin.fanficfare import adapters, writers, exceptions from calibre_plugins.fanficfare_plugin.fanficfare.epubutils import get_update_data from calibre_plugins.fanficfare_plugin.fff_util import ( get_fff_adapter, get_fff_config) try: ## No need to download at all. Can happen now due to ## collision moving into book for CALIBREONLY changing to ## ADDNEW when story URL not in library. if book['collision'] in (CALIBREONLY, CALIBREONLYSAVECOL): logger.info("Skipping CALIBREONLY 'update' down inside worker") return book book['comment'] = _('Download started...') configuration = get_fff_config(book['url'], options['fileform'], options['personal.ini']) if configuration.getConfig('use_ssl_unverified_context'): ## monkey patch to avoid SSL bug. dupliated from ## fff_plugin.py because bg jobs run in own process ## space. import ssl if hasattr(ssl, '_create_unverified_context'): ssl._create_default_https_context = ssl._create_unverified_context if not options[ 'updateepubcover'] and 'epub_for_update' in book and book[ 'collision'] in (UPDATE, UPDATEALWAYS): configuration.set("overrides", "never_make_cover", "true") # images only for epub, html, even if the user mistakenly # turned it on else where. if options['fileform'] not in ("epub", "html"): configuration.set("overrides", "include_images", "false") adapter = adapters.getAdapter(configuration, book['url']) adapter.is_adult = book['is_adult'] adapter.username = book['username'] adapter.password = book['password'] adapter.setChaptersRange(book['begin'], book['end']) configuration.load_cookiejar(options['cookiejarfile']) #logger.debug("cookiejar:%s"%configuration.cookiejar) configuration.set_pagecache(options['pagecache']) story = adapter.getStoryMetadataOnly() if not story.getMetadata("series") and 'calibre_series' in book: adapter.setSeries(book['calibre_series'][0], book['calibre_series'][1]) # set PI version instead of default. if 'version' in options: story.setMetadata('version', options['version']) book['title'] = story.getMetadata("title", removeallentities=True) book['author_sort'] = book['author'] = story.getList( "author", removeallentities=True) book['publisher'] = story.getMetadata("publisher") book['url'] = story.getMetadata("storyUrl", removeallentities=True) book['tags'] = story.getSubjectTags(removeallentities=True) book['comments'] = story.get_sanitized_description() book['series'] = story.getMetadata("series", removeallentities=True) if story.getMetadataRaw('datePublished'): book['pubdate'] = story.getMetadataRaw( 'datePublished').replace(tzinfo=local_tz) if story.getMetadataRaw('dateUpdated'): book['updatedate'] = story.getMetadataRaw( 'dateUpdated').replace(tzinfo=local_tz) if story.getMetadataRaw('dateCreated'): book['timestamp'] = story.getMetadataRaw( 'dateCreated').replace(tzinfo=local_tz) else: book['timestamp'] = datetime.now().replace( tzinfo=local_tz) # need *something* there for calibre. writer = writers.getWriter(options['fileform'], configuration, adapter) outfile = book['outfile'] ## checks were done earlier, it's new or not dup or newer--just write it. if book['collision'] in (ADDNEW, SKIP, OVERWRITE, OVERWRITEALWAYS) or \ ('epub_for_update' not in book and book['collision'] in (UPDATE, UPDATEALWAYS)): # preserve logfile even on overwrite. if 'epub_for_update' in book: adapter.logfile = get_update_data( book['epub_for_update'])[6] # change the existing entries id to notid so # write_epub writes a whole new set to indicate overwrite. if adapter.logfile: adapter.logfile = adapter.logfile.replace( "span id", "span notid") if book['collision'] == OVERWRITE and 'fileupdated' in book: lastupdated = story.getMetadataRaw('dateUpdated') fileupdated = book['fileupdated'] # updated doesn't have time (or is midnight), use dates only. # updated does have time, use full timestamps. if (lastupdated.time() == time.min and fileupdated.date() > lastupdated.date()) or \ (lastupdated.time() != time.min and fileupdated > lastupdated): raise NotGoingToDownload( _("Not Overwriting, web site is not newer."), 'edit-undo.png', showerror=False) logger.info("write to %s" % outfile) inject_cal_cols(book, story, configuration) writer.writeStory(outfilename=outfile, forceOverwrite=True) book['comment'] = _('Download %s completed, %s chapters.') % ( options['fileform'], story.getMetadata("numChapters")) book['all_metadata'] = story.getAllMetadata( removeallentities=True) if options['savemetacol'] != '': book['savemetacol'] = story.dump_html_metadata() ## checks were done earlier, just update it. elif 'epub_for_update' in book and book['collision'] in ( UPDATE, UPDATEALWAYS): # update now handled by pre-populating the old images and # chapters in the adapter rather than merging epubs. #urlchaptercount = int(story.getMetadata('numChapters').replace(',','')) # returns int adjusted for start-end range. urlchaptercount = story.getChapterCount() (url, chaptercount, adapter.oldchapters, adapter.oldimgs, adapter.oldcover, adapter.calibrebookmark, adapter.logfile, adapter.oldchaptersmap, adapter.oldchaptersdata) = get_update_data( book['epub_for_update'])[0:9] # dup handling from fff_plugin needed for anthology updates. if book['collision'] == UPDATE: if chaptercount == urlchaptercount: if merge: book['comment'] = _( "Already contains %d chapters. Reuse as is." ) % chaptercount book['all_metadata'] = story.getAllMetadata( removeallentities=True) if options['savemetacol'] != '': book['savemetacol'] = story.dump_html_metadata( ) book['outfile'] = book[ 'epub_for_update'] # for anthology merge ops. return book else: # not merge, raise NotGoingToDownload( _("Already contains %d chapters.") % chaptercount, 'edit-undo.png', showerror=False) elif chaptercount > urlchaptercount: raise NotGoingToDownload( _("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update." ) % (chaptercount, urlchaptercount), 'dialog_error.png') elif chaptercount == 0: raise NotGoingToDownload( _("FanFicFare doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update." ), 'dialog_error.png') if not (book['collision'] == UPDATEALWAYS and chaptercount == urlchaptercount) \ and adapter.getConfig("do_update_hook"): chaptercount = adapter.hookForUpdates(chaptercount) logger.info("Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount)) logger.info("write to %s" % outfile) inject_cal_cols(book, story, configuration) writer.writeStory(outfilename=outfile, forceOverwrite=True) book['comment'] = _('Update %s completed, added %s chapters for %s total.')%\ (options['fileform'],(urlchaptercount-chaptercount),urlchaptercount) book['all_metadata'] = story.getAllMetadata( removeallentities=True) if options['savemetacol'] != '': book['savemetacol'] = story.dump_html_metadata() else: ## Shouldn't ever get here, but hey, it happened once ## before with prefs['collision'] raise Exception( "Impossible state reached -- Book: %s:\nOptions:%s:" % (book, options)) if options['do_wordcount'] == SAVE_YES or ( options['do_wordcount'] == SAVE_YES_UNLESS_SITE and not story.getMetadataRaw('numWords')): wordcount = get_word_count(outfile) logger.info("get_word_count:%s" % wordcount) story.setMetadata('numWords', wordcount) writer.writeStory(outfilename=outfile, forceOverwrite=True) book['all_metadata'] = story.getAllMetadata( removeallentities=True) if options['savemetacol'] != '': book['savemetacol'] = story.dump_html_metadata() if options['smarten_punctuation'] and options['fileform'] == "epub" \ and calibre_version >= (0, 9, 39): # for smarten punc from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS from calibre.utils.logging import Log from collections import namedtuple # do smarten_punctuation from calibre's polish feature data = {'smarten_punctuation': True} opts = ALL_OPTS.copy() opts.update(data) O = namedtuple('Options', ' '.join(six.iterkeys(ALL_OPTS))) opts = O(**opts) log = Log(level=Log.DEBUG) polish({outfile: outfile}, opts, log, logger.info) except NotGoingToDownload as d: book['good'] = False book['status'] = _('Bad') book['showerror'] = d.showerror book['comment'] = unicode(d) book['icon'] = d.icon except Exception as e: book['good'] = False book['status'] = _('Error') book['comment'] = unicode(e) book['icon'] = 'dialog_error.png' book['status'] = _('Error') logger.info("Exception: %s:%s" % (book, book['comment']), exc_info=True) #time.sleep(10) return book

Exemplo n.º 2

0

Exibir arquivo

Arquivo: jobs.py Projeto: Zweibach/FanFicFare

def do_download_for_worker(book,options,merge,notification=lambda x,y:x): ''' Child job, to download story when run as a worker job ''' from calibre_plugins.fanficfare_plugin import FanFicFareBase fffbase = FanFicFareBase(options['plugin_path']) with fffbase: from calibre_plugins.fanficfare_plugin.dialogs import (NotGoingToDownload, OVERWRITE, OVERWRITEALWAYS, UPDATE, UPDATEALWAYS, ADDNEW, SKIP, CALIBREONLY, CALIBREONLYSAVECOL) from calibre_plugins.fanficfare_plugin.fanficfare import adapters, writers, exceptions from calibre_plugins.fanficfare_plugin.fanficfare.epubutils import get_update_data from calibre_plugins.fanficfare_plugin.fff_util import (get_fff_adapter, get_fff_config) try: book['comment'] = _('Download started...') configuration = get_fff_config(book['url'], options['fileform'], options['personal.ini']) if configuration.getConfig('use_ssl_unverified_context'): ## monkey patch to avoid SSL bug. dupliated from ## fff_plugin.py because bg jobs run in own process ## space. import ssl if hasattr(ssl, '_create_unverified_context'): ssl._create_default_https_context = ssl._create_unverified_context if not options['updateepubcover'] and 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS): configuration.set("overrides","never_make_cover","true") # images only for epub, html, even if the user mistakenly # turned it on else where. if options['fileform'] not in ("epub","html"): configuration.set("overrides","include_images","false") adapter = adapters.getAdapter(configuration,book['url']) adapter.is_adult = book['is_adult'] adapter.username = book['username'] adapter.password = book['password'] adapter.setChaptersRange(book['begin'],book['end']) adapter.load_cookiejar(options['cookiejarfile']) #logger.debug("cookiejar:%s"%adapter.cookiejar) adapter.set_pagecache(options['pagecache']) story = adapter.getStoryMetadataOnly() if 'calibre_series' in book: adapter.setSeries(book['calibre_series'][0],book['calibre_series'][1]) # set PI version instead of default. if 'version' in options: story.setMetadata('version',options['version']) book['title'] = story.getMetadata("title", removeallentities=True) book['author_sort'] = book['author'] = story.getList("author", removeallentities=True) book['publisher'] = story.getMetadata("site") book['url'] = story.getMetadata("storyUrl") book['tags'] = story.getSubjectTags(removeallentities=True) if story.getMetadata("description"): book['comments'] = sanitize_comments_html(story.getMetadata("description")) else: book['comments']='' book['series'] = story.getMetadata("series", removeallentities=True) if story.getMetadataRaw('datePublished'): book['pubdate'] = story.getMetadataRaw('datePublished').replace(tzinfo=local_tz) if story.getMetadataRaw('dateUpdated'): book['updatedate'] = story.getMetadataRaw('dateUpdated').replace(tzinfo=local_tz) if story.getMetadataRaw('dateCreated'): book['timestamp'] = story.getMetadataRaw('dateCreated').replace(tzinfo=local_tz) else: book['timestamp'] = None # need *something* there for calibre. writer = writers.getWriter(options['fileform'],configuration,adapter) outfile = book['outfile'] ## No need to download at all. Shouldn't ever get down here. if options['collision'] in (CALIBREONLY, CALIBREONLYSAVECOL): logger.info("Skipping CALIBREONLY 'update' down inside worker--this shouldn't be happening...") book['comment'] = 'Metadata collected.' book['all_metadata'] = story.getAllMetadata(removeallentities=True) if options['savemetacol'] != '': book['savemetacol'] = story.dump_html_metadata() ## checks were done earlier, it's new or not dup or newer--just write it. elif options['collision'] in (ADDNEW, SKIP, OVERWRITE, OVERWRITEALWAYS) or \ ('epub_for_update' not in book and options['collision'] in (UPDATE, UPDATEALWAYS)): # preserve logfile even on overwrite. if 'epub_for_update' in book: adapter.logfile = get_update_data(book['epub_for_update'])[6] # change the existing entries id to notid so # write_epub writes a whole new set to indicate overwrite. if adapter.logfile: adapter.logfile = adapter.logfile.replace("span id","span notid") if options['collision'] == OVERWRITE and 'fileupdated' in book: lastupdated=story.getMetadataRaw('dateUpdated') fileupdated=book['fileupdated'] # updated doesn't have time (or is midnight), use dates only. # updated does have time, use full timestamps. if (lastupdated.time() == time.min and fileupdated.date() > lastupdated.date()) or \ (lastupdated.time() != time.min and fileupdated > lastupdated): raise NotGoingToDownload(_("Not Overwriting, web site is not newer."),'edit-undo.png') logger.info("write to %s"%outfile) inject_cal_cols(book,story,configuration) writer.writeStory(outfilename=outfile, forceOverwrite=True) book['comment'] = 'Download %s completed, %s chapters.'%(options['fileform'],story.getMetadata("numChapters")) book['all_metadata'] = story.getAllMetadata(removeallentities=True) if options['savemetacol'] != '': book['savemetacol'] = story.dump_html_metadata() ## checks were done earlier, just update it. elif 'epub_for_update' in book and options['collision'] in (UPDATE, UPDATEALWAYS): # update now handled by pre-populating the old images and # chapters in the adapter rather than merging epubs. urlchaptercount = int(story.getMetadata('numChapters').replace(',','')) (url, chaptercount, adapter.oldchapters, adapter.oldimgs, adapter.oldcover, adapter.calibrebookmark, adapter.logfile, adapter.oldchaptersmap, adapter.oldchaptersdata) = get_update_data(book['epub_for_update'])[0:9] # dup handling from fff_plugin needed for anthology updates. if options['collision'] == UPDATE: if chaptercount == urlchaptercount: if merge: book['comment']=_("Already contains %d chapters. Reuse as is.")%chaptercount book['all_metadata'] = story.getAllMetadata(removeallentities=True) if options['savemetacol'] != '': book['savemetacol'] = story.dump_html_metadata() book['outfile'] = book['epub_for_update'] # for anthology merge ops. return book else: # not merge, raise NotGoingToDownload(_("Already contains %d chapters.")%chaptercount,'edit-undo.png') elif chaptercount > urlchaptercount: raise NotGoingToDownload(_("Existing epub contains %d chapters, web site only has %d. Use Overwrite to force update.") % (chaptercount,urlchaptercount),'dialog_error.png') elif chaptercount == 0: raise NotGoingToDownload(_("FanFicFare doesn't recognize chapters in existing epub, epub is probably from a different source. Use Overwrite to force update."),'dialog_error.png') if not (options['collision'] == UPDATEALWAYS and chaptercount == urlchaptercount) \ and adapter.getConfig("do_update_hook"): chaptercount = adapter.hookForUpdates(chaptercount) logger.info("Do update - epub(%d) vs url(%d)" % (chaptercount, urlchaptercount)) logger.info("write to %s"%outfile) inject_cal_cols(book,story,configuration) writer.writeStory(outfilename=outfile, forceOverwrite=True) book['comment'] = _('Update %s completed, added %s chapters for %s total.')%\ (options['fileform'],(urlchaptercount-chaptercount),urlchaptercount) book['all_metadata'] = story.getAllMetadata(removeallentities=True) if options['savemetacol'] != '': book['savemetacol'] = story.dump_html_metadata() if options['do_wordcount'] == SAVE_YES or ( options['do_wordcount'] == SAVE_YES_UNLESS_SITE and not story.getMetadataRaw('numWords') ): wordcount = get_word_count(outfile) logger.info("get_word_count:%s"%wordcount) story.setMetadata('numWords',wordcount) writer.writeStory(outfilename=outfile, forceOverwrite=True) book['all_metadata'] = story.getAllMetadata(removeallentities=True) if options['savemetacol'] != '': book['savemetacol'] = story.dump_html_metadata() if options['smarten_punctuation'] and options['fileform'] == "epub" \ and calibre_version >= (0, 9, 39): # for smarten punc from calibre.ebooks.oeb.polish.main import polish, ALL_OPTS from calibre.utils.logging import Log from collections import namedtuple # do smarten_punctuation from calibre's polish feature data = {'smarten_punctuation':True} opts = ALL_OPTS.copy() opts.update(data) O = namedtuple('Options', ' '.join(ALL_OPTS.iterkeys())) opts = O(**opts) log = Log(level=Log.DEBUG) polish({outfile:outfile}, opts, log, logger.info) except NotGoingToDownload as d: book['good']=False book['comment']=unicode(d) book['icon'] = d.icon except Exception as e: book['good']=False book['comment']=unicode(e) book['icon']='dialog_error.png' book['status'] = 'Error' logger.info("Exception: %s:%s"%(book,unicode(e))) traceback.print_exc() #time.sleep(10) return book