def load(cache, auId, doUrls, expire, trials, sleep, timeout): act = Action.GETAUSUMMARY if (doUrls): act = act + "," + Action.GETURLLIST log.info('Start %s: %s expire=%s ...' % (act, auId, str(expire))) success = False try: for _ in range(trials): try: if (doUrls): UrlReport.loadTry(cache.ui, auId, expire) else: LockssCacheAuSummary.__loadTry(cache.ui, auId, expire) success = True break except urllib2.HTTPError as inst: cache.reconnect(sleep,timeout) except ExpatError: log.error("XML Parser error; could not %s %s" % (auId, act)) success = False; # try again if (not success): log.error("exhausted trials for %s; could not load %s" % (auId, act)) except LockssError as inst: log.warn("LockssException: %s" % inst) # output is scanned for the ERROR string log.warn("could not digest %s for %s" % (act, auId.auId)) finally: log.debug2('Stop %s: %s Success = %s ...' % (act, auId, success)) return success
def open_file(self, server, action): f = None dirname = self.mkdir(action, server) if (dirname): f = open("%s/%s.tsv" % (dirname, action), 'w') if (not f): log.error("Could not open %s/%s" % (dirname, action)) return f
def mkdir(self, action, server): dirname = "%s/%s" % (self.options.dir, server) if not os.path.exists(dirname): try: os.makedirs(dirname,0777) log.info("created output directory %s" % dirname) except: log.error("Could not create %s" % dirname) return None else: log.debug2("using output directory %s" % dirname) return dirname
def process(self): for server in self.options.cachelist: self.cache = self.get_cache(server.domain, server.port, True, self.options.username, self.options.password) auids = self.collectAuIdInstances(self.cache) if (len(auids) > self.options.max): msg = "cowardly refusing to start %d crawls on %s" % (len(auids), self.cache) raise RuntimeError, msg for auId in auids: log.info('Request Crawl: %s %s ' % (self.cache, auId)) try: self.cache.ui.startCrawl(auId.getLockssAu()) log.info("started crawl: %s" % auId) except LockssError as e: log.error( "no crawl for %s (%s)" % (auId, e));
def load(cache, trials, sleep, timeout): act = Action.GETCOMMPEERS log.info("Start %s:..." % (act)) success = False try: for _ in range(trials): try: CacheCommPeer.__loadTry(cache.ui, cache) success = True break except urllib2.HTTPError as inst: cache.reconnect(sleep, timeout) if not success: log.error("exhausted trials; could not %s %s" % (act, cache)) except Exception as inst: log.error("could not digest comm peer status %s (%s)" % (str(inst), traceback.format_exc())) finally: log.debug("Stop %s: Success %s..." % (act, success)) return success
def process(self): log.info("--- Start Processing") options = self.options server = self.options.cachelist[0]; self.cache = self.get_cache(server.domain, server.port, True, options.username, options.password) if (not self.options.dryrun): success = LockssCacheAuId.load(self.cache) if (not success): log.error('Exiting: could not load auids from cache %s' % (self.cache)) raise RuntimeError, 'could not load auids from cache' auids = self.collectAuIdInstances(self.cache) if (self.options.dryrun): return repeat = auids while (repeat): # do a first round until all auids are found to conform # do not retry auids that are found to be ok while (repeat): repeat = self.crawlstatus(repeat) if (repeat): self.sleep(options.pause) # time has expired, try on original auids to see # whether they all still conforming log.info("Retrying all auids to recheck") repeat = self.crawlstatus(auids) if (repeat): self.sleep(options.pause) log.info("No active crawls; printing info about most recent crawls") f = open(options.dir + "/AuCrawlStatus.tsv", 'w') LockssCrawlStatus.printcsv(f, auids, options.crawlsort, 1, options.crawlheaders, "\t") f.close() log.info("--- Stop Processing")
def load(cache, auId, trials, sleep,timeout): success = False log.debug2('Start %s: %s ...' % (Action.GETCRAWLSTATUS, auId)) try: log.info('get %s: %s ...' % (Action.GETCRAWLSTATUS, auId)) for i in range(trials): try: LockssCrawlStatus.__loadTry(cache.ui, auId) success = True break except urllib2.HTTPError as inst: cache.reconnect(sleep,timeout) log.error("exhausted trials for %s, could not load crawlstatus" % (auId)) except LockssError as inst: log.warn("LockssException: %s" % inst) # output is scanned for the ERROR string log.warn("could not digest %s for %s" % (Action.GETCRAWLSTATUS, auId.auId)) finally: log.debug2('Stop %s: %s Success = %s ...' % (Action.GETCRAWLSTATUS, auId, success)) return success
def process_server(self, server): ''' if dryrun collect matching auids and log.info them otherwise perform all requested actions ''' log.info("------ Start Processing %s" % server) options = self.options try: self.cache = self.get_cache(server.domain, server.port, options.need_credentials, options.username, options.password) if (not options.dryrun): if (Action.GETAUIDLIST in options.action): success = LockssCacheAuId.load(self.cache) if (not success): log.error('Exiting: could not load auids from cache %s' % (self.cache)) raise RuntimeError, 'could not load auids from cache' auids = self.collectAuIdInstances(self.cache) if (options.dryrun): return if (Action.PRTAUIDLIST in options.action): f = self.open_file(self.cache.name, Action.PRTAUIDLIST) # TODO get all auids for server if (f): LockssCacheAuId.printcsv(f, auids, "\t") f.close() if (Action.GETREPOSSPACE in options.action): self.getreposspace() if (Action.PRTREPOSSPACE in options.action): f = self.open_file(self.cache.name, Action.PRTREPOSSPACE) if (f): RepositorySpace.printcsv(f, [ self.cache ], "\t") f.close() # actions below needs auids to operate on if (not auids): log.info("no matching auids"); return; doUrls = Action.GETURLLIST in options.action success = None if (Action.GETAUSUMMARY in options.action): self.getausummaries(auids, options.dir, doUrls, options.expire, options.noquit) if (Action.PRTAUSUMMARY in options.action): f = self.open_file(self.cache.name, Action.PRTAUSUMMARY) if (f): LockssCacheAuSummary.printcsv(f, auids, options.ausummarysort, options.ausummaryheaders, "\t") f.close() if (Action.PRTURLLIST in options.action): dr = self.mkdir(options.action, self.cache.name) if (dr): UrlReport.printcsv("%s/%s" % (self.options.dir, server.name), #dir, auids, options.urlsort, options.urlheaders, '\t', options.urlminversion) if (Action.GETCRAWLSTATUS in options.action): self.getcrawlstatus(auids, options.dir, options.noquit) if (Action.PRTCRAWLSTATUS in options.action): f = self.open_file(self.cache.name, Action.PRTCRAWLSTATUS) if (f): LockssCrawlStatus.printcsv(f, auids, options.crawlsort, options.ncrawllimit, options.crawlheaders, "\t") f.close() if (Action.GETCOMMPEERS in options.action): self.getcommpeers(options.dir, options.noquit) if (Action.PRTCOMMPEERS in options.action): f = self.open_file(self.cache.name, Action.PRTCOMMPEERS) if (f): # TODO LockssCacheCommPeer.printcsv(f, self.cache) f.close() except LockssError as e: log.error("EXCEPTION %s" % str(e)) finally: log.debug2("------ Stop Processing %s" % server)