Exemplo n.º 1
0
 def startYourEngines(self):
     sched = TwistedScheduler()
     sched.start()
     if sched.get_job('host_status'):
         pass
     else:
         sched.add_job(self.hoststatus,
                       'interval',
                       seconds=10,
                       id='host_status')
Exemplo n.º 2
0
class Spiders(resource.Resource):
    u"""
    显示Spider的工作状态以及调度计划
    """
    config = Config()
    def __init__(self, root, local_items):
        resource.Resource.__init__(self)
        self.root = root
        self.local_items = local_items
        self.spider_status_dic = {}

        logging.basicConfig()
        self.scheduler = TwistedScheduler()
        self.scheduler.start()

    def get_spider_status(self, project):
        spider_status = self.spider_status_dic.get(project)
        if not spider_status:
            spider_status = dict((spider, {'status': 'finished', 'timestamp': None, 'job': None, 'schedule_job': None})
                                 for spider in get_spider_list(project))
            self.spider_status_dic[project] = spider_status
        self._update_spider_status(project)
        return spider_status

    def _update_spider_status(self, project):
        u"""
        先获取目前任务调度情况,然后再获取apsheduler中的任务。
        """
        spider_status = self.spider_status_dic.get(project)
        for project, queue in self.root.poller.queues.items():
            for m in queue.list():
                spider = m['name']
                job = m['_job']
                spider_status[spider]['status'] = 'pending'
                spider_status[spider]['timestamp'] = None
                spider_status[spider]['job'] = job
        for p in self.root.launcher.processes.values():
            spider = p.spider
            spider_status[spider]['status'] = 'running'
            spider_status[spider]['timestamp'] = p.start_time
            spider_status[spider]['job'] = p.job
        for p in self.root.launcher.finished:
            spider = p.spider
            spider_status[spider]['status'] = 'finished'
            spider_status[spider]['timestamp'] = p.end_time
            spider_status[spider]['job'] = p.job

        for spider in spider_status:
            status = spider_status[spider]
            sjob = self.scheduler.get_job(spider)
            status['schedule_job'] = sjob
            if sjob:
                status['next_time'] = sjob.next_run_time
            else:
                status['next_time'] = None
            # sjob._get_run_times()

    def render_GET(self, txrequest):
        args = dict((k, v[0]) for k, v in txrequest.args.items())
        project = args.pop('project', 'careerTalk')
        spider_status = self.get_spider_status(project)
        content = "<tr>"
        for th in ['spider', 'status', 'timestamp', 'next_time', 'data']:
            content += "<th>%s</th>" % th
        content += "</tr>"
        for spider in spider_status:
            status = spider_status[spider]
            content += "<tr>"
            content += "<td>%s</td><td>%s</td><td>%s</td><td>%s</td>" \
                       % (spider, status['status'], status['timestamp'], status['next_time'])
            content += "<td><a href='/data/%s/'>data</a></td>" % spider
            content += "</tr>"
        sub_form = "<form action='' method='post'><input type='submit' value='开启所有任务'></input></form>"
        html = "<table>"+content+"</table>"+sub_form
        return html

    def render_POST(self, txrequest):
        args = dict((k, v[0]) for k, v in txrequest.args.items())
        project = args.pop('project', 'careerTalk')
        # spiders = ['NJU', 'BIT', 'ECUST', 'RUC']
        spiders = get_spider_list(project)

        tstart = dt.datetime.utcnow()
        for spider in spiders:
            job = self.scheduler.add_job(spider_crawl, 'interval', minutes=60, replace_existing=True,
                                         id=spider, next_run_time=tstart, args=[project, spider])
            tstart = tstart + dt.timedelta(seconds=5)
        return "<span>任务全部开启</span><a href='/'>返回</a>"