示例#1
0
 def load(cache, auId, doUrls, expire, trials, sleep, timeout): 
     act = Action.GETAUSUMMARY
     if (doUrls): 
         act = act +  "," + Action.GETURLLIST
     log.info('Start %s: %s expire=%s ...' % (act, auId, str(expire)))
     success = False
     try: 
         for _ in  range(trials): 
             try: 
                 if (doUrls):
                     UrlReport.loadTry(cache.ui, auId, expire)
                 else:
                     LockssCacheAuSummary.__loadTry(cache.ui, auId, expire)
                 success = True
                 break
             except urllib2.HTTPError as inst:
                 cache.reconnect(sleep,timeout)
             except ExpatError: 
                 log.error("XML Parser error; could not %s %s" % (auId, act))
                 success = False; # try again 
         if (not success):                  
             log.error("exhausted trials for %s; could not load %s" % (auId, act))
     except LockssError as inst:
         log.warn("LockssException: %s" % inst)  # output is scanned for the ERROR string 
         log.warn("could not digest %s for %s" % (act, auId.auId)) 
     finally: 
         log.debug2('Stop %s: %s Success = %s ...' % (act, auId, success))
     return success
示例#2
0
 def open_file(self, server, action):
     f = None 
     dirname = self.mkdir(action, server)
     if (dirname): 
         f = open("%s/%s.tsv" % (dirname, action), 'w')
         if (not f): 
             log.error("Could not open %s/%s" % (dirname, action))
     return f
示例#3
0
 def mkdir(self, action, server):
     dirname = "%s/%s" % (self.options.dir, server)
     if not os.path.exists(dirname):
         try: 
             os.makedirs(dirname,0777)
             log.info("created output directory %s" % dirname)
         except: 
             log.error("Could not create %s" % dirname)
             return None 
     else: 
             log.debug2("using output directory %s" % dirname)           
     return dirname 
示例#4
0
 def process(self):
     for server in self.options.cachelist: 
         self.cache = self.get_cache(server.domain, server.port,
                                      True, self.options.username, self.options.password)
         auids = self.collectAuIdInstances(self.cache)
         if (len(auids) > self.options.max):
             msg = "cowardly refusing to start %d crawls on %s" % (len(auids), self.cache) 
             raise RuntimeError, msg
             
         for auId in auids:
             log.info('Request Crawl: %s %s ' % (self.cache, auId))
             try: 
                 self.cache.ui.startCrawl(auId.getLockssAu())
                 log.info("started crawl: %s" % auId)
             except LockssError as e:
                 log.error( "no crawl for %s (%s)" % (auId, e));
                 
示例#5
0
 def load(cache, trials, sleep, timeout):
     act = Action.GETCOMMPEERS
     log.info("Start %s:..." % (act))
     success = False
     try:
         for _ in range(trials):
             try:
                 CacheCommPeer.__loadTry(cache.ui, cache)
                 success = True
                 break
             except urllib2.HTTPError as inst:
                 cache.reconnect(sleep, timeout)
         if not success:
             log.error("exhausted trials; could not %s %s" % (act, cache))
     except Exception as inst:
         log.error("could not digest comm peer status %s (%s)" % (str(inst), traceback.format_exc()))
     finally:
         log.debug("Stop %s: Success %s..." % (act, success))
     return success
示例#6
0
    def process(self):
        log.info("--- Start Processing") 
    
        options = self.options 

        server  = self.options.cachelist[0];
        self.cache = self.get_cache(server.domain, server.port, 
                                         True, options.username, 
                                         options.password)
        
        if (not self.options.dryrun):
            success = LockssCacheAuId.load(self.cache)
            if (not success):
                log.error('Exiting: could not load auids from cache %s' % (self.cache))
                raise RuntimeError, 'could not load auids from cache'
                              
        auids = self.collectAuIdInstances(self.cache)
        
        if (self.options.dryrun):
            return 
        
        repeat = auids
        while (repeat):             
            # do a first round until all auids are found to conform 
            # do not retry auids that are found to be ok  
            while (repeat):
                repeat = self.crawlstatus(repeat)
                if (repeat): 
                    self.sleep(options.pause)
                
            # time has expired, try on original auids to see 
            # whether they all still conforming 
            log.info("Retrying all auids to recheck") 
            repeat = self.crawlstatus(auids) 
            if (repeat):
                self.sleep(options.pause)
            
        log.info("No active crawls; printing info about most recent crawls") 
        f = open(options.dir + "/AuCrawlStatus.tsv", 'w')
        LockssCrawlStatus.printcsv(f, auids, options.crawlsort, 1, options.crawlheaders, "\t")
        f.close()
            
        log.info("--- Stop Processing") 
示例#7
0
 def load(cache, auId, trials, sleep,timeout):
     success = False
     log.debug2('Start %s: %s ...' % (Action.GETCRAWLSTATUS, auId))
     try: 
         log.info('get %s: %s ...' % (Action.GETCRAWLSTATUS, auId))
         for i in  range(trials): 
             try: 
                 LockssCrawlStatus.__loadTry(cache.ui, auId)
                 success = True
                 break
             except urllib2.HTTPError as inst:
                 cache.reconnect(sleep,timeout)
                 log.error("exhausted trials for %s, could not load crawlstatus" % (auId))
     except LockssError as inst:
         log.warn("LockssException: %s" % inst)  # output is scanned for the ERROR string 
         log.warn("could not digest %s for %s" % (Action.GETCRAWLSTATUS, auId.auId)) 
     finally: 
         log.debug2('Stop %s: %s Success = %s ...' % (Action.GETCRAWLSTATUS, auId, success))
     return  success
示例#8
0
 def process_server(self, server):
     '''
     if dryrun collect matching auids and log.info them
     otherwise perform all requested actions 
     '''    
    
     log.info("------ Start Processing %s" % server) 
     options = self.options 
     try: 
         self.cache = self.get_cache(server.domain, server.port, 
                                      options.need_credentials, options.username, 
                                      options.password)
         
         
         if (not options.dryrun):
             if (Action.GETAUIDLIST in options.action): 
                 success = LockssCacheAuId.load(self.cache)
                 if (not success):
                     log.error('Exiting: could not load auids from cache %s' % (self.cache))
                     raise RuntimeError, 'could not load auids from cache'
         auids = self.collectAuIdInstances(self.cache)
         
         if (options.dryrun): 
             return
         
         if (Action.PRTAUIDLIST in options.action): 
             f = self.open_file(self.cache.name, Action.PRTAUIDLIST)  
             # TODO get all auids for server 
             if (f): 
                 LockssCacheAuId.printcsv(f, auids, "\t")
                 f.close()
         
         if (Action.GETREPOSSPACE in options.action):
             self.getreposspace()
     
         if (Action.PRTREPOSSPACE in options.action): 
             f = self.open_file(self.cache.name, Action.PRTREPOSSPACE)  
             if (f): 
                 RepositorySpace.printcsv(f, [ self.cache ], "\t")
                 f.close()
                 
         # actions below needs auids to operate on 
         if (not auids):
             log.info("no matching auids"); 
             return;
         
         doUrls = Action.GETURLLIST in options.action
         success = None
     
         if (Action.GETAUSUMMARY in options.action): 
             self.getausummaries(auids, options.dir, doUrls, options.expire, options.noquit)
             
         if (Action.PRTAUSUMMARY in options.action): 
             f = self.open_file(self.cache.name, Action.PRTAUSUMMARY)
             if (f): 
                 LockssCacheAuSummary.printcsv(f, auids, options.ausummarysort, options.ausummaryheaders, "\t")
                 f.close()
             
         if (Action.PRTURLLIST in options.action):
             dr = self.mkdir(options.action, self.cache.name)
             if (dr): 
                 UrlReport.printcsv("%s/%s" % (self.options.dir, server.name),  #dir, 
                                auids, options.urlsort, options.urlheaders, '\t', options.urlminversion)
         
         if (Action.GETCRAWLSTATUS in options.action): 
             self.getcrawlstatus(auids, options.dir, options.noquit)
                 
         if (Action.PRTCRAWLSTATUS in options.action): 
             f = self.open_file(self.cache.name, Action.PRTCRAWLSTATUS)
             if (f): 
                 LockssCrawlStatus.printcsv(f, auids, options.crawlsort, options.ncrawllimit, options.crawlheaders, "\t")
                 f.close()
         
         if (Action.GETCOMMPEERS in options.action): 
             self.getcommpeers(options.dir, options.noquit)
     
         if (Action.PRTCOMMPEERS in options.action): 
             f = self.open_file(self.cache.name, Action.PRTCOMMPEERS)
             if (f): 
                 # TODO LockssCacheCommPeer.printcsv(f, self.cache)
                 f.close()
     except LockssError as e:
         log.error("EXCEPTION %s" % str(e))
     finally:         
         log.debug2("------ Stop Processing %s" % server)