Exemplo n.º 1
0
    def depile(self):
        if self.queue is None:
            yield self.init_queue()
        if not len(self.queue):
            returnD(None)

        status = yield self.get_scrapyd_status()
        if status["pending"] > 0:
            returnD(None)
        # Add some random wait to allow possible concurrent Hyphe instance
        # to compete for ScrapyD's empty slots
        yield deferredSleep(1./randint(4,20))

        # Order jobs by corpus with less currently running crawls then age
        ordered = sorted(self.queue.items(), key=lambda x: \
          float("%s.%s" % (status.get(x[1]["corpus"], 0), x[1]["timestamp"])))
        job_id, job = ordered[0]
        res = yield self.send_scrapy_query('schedule', job["crawl_arguments"])
        ts = now_ts()
        if is_error(res):
            logger.msg("WARNING: error sending job %s to ScrapyD: %s" % (job, res))
            self.queue[job_id]['timestamp'] = ts    # let it retry a bit later
        else:
            yield self.db.update_job(job["corpus"], job_id, res['jobid'], ts)
            yield self.db.add_log(job["corpus"], job_id, "CRAWL_SCHEDULED", ts)
            del(self.queue[job_id])
Exemplo n.º 2
0
 def stop(self, now=False):
     if self.monitor.running:
         self.monitor.stop()
     if self.stopping():
         returnD(None)
     self.status = "error" if self.error else "stopping"
     while not now and self.call_running:
         yield deferredSleep(0.1)
     if self.transport:
         self.protocol.stop()
         self.transport = None
     self.log("Traph stopped")
     if not self.error:
         self.status = "stopped"
     self.checkAndRemovePID()
Exemplo n.º 3
0
 def stop(self, now=False):
     if self.monitor.running:
         self.monitor.stop()
     if self.stopping():
         returnD(None)
     self.status = "error" if self.error else "stopping"
     while not now and self.call_running:
         yield deferredSleep(0.1)
     if self.transport:
         self.protocol.stop()
         self.transport = None
     self.log("Traph stopped")
     if not self.error:
         self.status = "stopped"
     else:
         self.checkAndRemovePID()