Exemplo n.º 1
0
 def run(self):
     while True:
         num = getCrawlNoRssRequestLength()
         logging.info("********need deal request num : %s " % num)
         if not num:
             if self.runNum >= 1:
                 logging.info("*****************size:%s********runNum:%s********" % (self.size, self.runNum))
                 self.runSpider()
             break
         else:
             self.initSpider()
             if self.runNum >= self.size:
                 logging.info("*****************size:%s********runNum:%s********" % (self.size, self.runNum))
                 self.runSpider()
                 break
Exemplo n.º 2
0
 def run(self):
     while True:
         num = getCrawlNoRssRequestLength()
         logging.info("********need deal request num : %s " % num)
         if not num:
             if self.runNum >= 1:
                 logging.info(
                     "*****************size:%s********runNum:%s********" %
                     (self.size, self.runNum))
                 self.runSpider()
             break
         else:
             self.initSpider()
             if self.runNum >= self.size:
                 logging.info(
                     "*****************size:%s********runNum:%s********" %
                     (self.size, self.runNum))
                 self.runSpider()
                 break
Exemplo n.º 3
0
def startScript():
    times = 0
    # beginTime = int(time.time())
    while True:
        try:
            times += 1
            num = getCrawlNoRssRequestLength()
            logging.info("**********need deal request num :%s************" % num)

            if not num:
                logging.info("**********sleep:%s************" % MAIN_LOOP_SLEEP_TIME)
                time.sleep(MAIN_LOOP_SLEEP_TIME)
            else:
                os.system('python runSpider.py')

            # if times > RUN_SYNC_INTERVAL_TIMES or int(time.time()) - beginTime > RUN_SYNC_INTERVAL_TIME:
            #     logging.info("**********sync crawl infos ************")
            #     sync = SyncCrawlInfos()
            #     sync.index()
            #     times = 0
            #     beginTime = int(time.time())

        except Exception, e:
            logging.info("--------------%s------------" % e)