Пример #1
0
    def sync_job_status(self, project):
        """
        同步scrapyd服务器上的job状态 到 系统的job_execution任务执行数据库中来
        :param project:
        :return:
        """

        for spider_service_instance in self.spider_service_instances_slave:
            # 从scrapyd中根据爬虫项目名获取爬虫项目下的蜘蛛任务运行状态
            # ex: {'pending': [], 'running': [], 'finish': []}
            job_status = spider_service_instance.get_job_list(project.project_name)
            # 从数据库中获取未完成('pending', 'running')的蜘蛛任务
            job_execution_list = JobExecution.list_uncomplete_job()
            # 根据job_execution 任务执行 数据库中的数据构造 {'任务执行id': '任务执行详情'} 字典
            job_execution_dict = dict(
                [(job_execution.service_job_execution_id.split('>')[-1], job_execution) for job_execution in job_execution_list])
            '''
            把数据库中的job_execution任务执行情况 与 scrapyd中的任务执行情况做匹配
            更新其相应的字段
            '''
            # 正在运行的(PENDING)
            for job_execution_info in job_status[SpiderStatus.RUNNING]:
                job_execution = job_execution_dict.get(job_execution_info['id'])
                if job_execution and job_execution.running_status == SpiderStatus.PENDING:
                    job_execution.start_time = job_execution_info['start_time']
                    job_execution.running_status = SpiderStatus.RUNNING

            # 运行完成的(FINISH)
            for job_execution_info in job_status[SpiderStatus.FINISHED]:
                job_execution = job_execution_dict.get(job_execution_info['id'])
                if job_execution and job_execution.running_status != SpiderStatus.FINISHED:
                    job_execution.start_time = job_execution_info['start_time']
                    job_execution.end_time = job_execution_info['end_time']
                    job_execution.running_status = SpiderStatus.FINISHED
            db.session.commit()
Пример #2
0
    def sync_job_status(self, project):
        for spider_service_instance in self.spider_service_instances:
            job_status = spider_service_instance.get_job_list(project.project_name)
            job_execution_list = JobExecution.list_uncomplete_job()
            job_execution_dict = dict(
                [(job_execution.service_job_execution_id, job_execution) for job_execution in job_execution_list])
            # running
            for job_execution_info in job_status[SpiderStatus.RUNNING]:
                job_execution = job_execution_dict.get(job_execution_info['id'])
                if job_execution and job_execution.running_status == SpiderStatus.PENDING:
                    job_execution.start_time = job_execution_info['start_time']
                    job_execution.running_status = SpiderStatus.RUNNING

            # finished
            for job_execution_info in job_status[SpiderStatus.FINISHED]:
                job_execution = job_execution_dict.get(job_execution_info['id'])
                if job_execution and job_execution.running_status != SpiderStatus.FINISHED:
                    job_execution.start_time = job_execution_info['start_time']
                    job_execution.end_time = job_execution_info['end_time']
                    job_execution.running_status = SpiderStatus.FINISHED
            # commit
            try:
                db.session.commit()
            except:
                db.session.rollback()
                raise
Пример #3
0
    def sync_job_status(self, project):
        for spider_service_instance in self.spider_service_instances:
            job_status = spider_service_instance.get_job_list(project.project_name)
            job_execution_list = JobExecution.list_uncomplete_job()
            job_execution_dict = dict(
                [(job_execution.service_job_execution_id, job_execution) for job_execution in job_execution_list])
            # running
            for job_execution_info in job_status[SpiderStatus.RUNNING]:
                job_execution = job_execution_dict.get(job_execution_info['id'])
                if job_execution and job_execution.running_status == SpiderStatus.PENDING:
                    job_execution.start_time = job_execution_info['start_time']
                    job_execution.running_status = SpiderStatus.RUNNING

            # finished
            for job_execution_info in job_status[SpiderStatus.FINISHED]:
                job_execution = job_execution_dict.get(job_execution_info['id'])
                if job_execution and job_execution.running_status != SpiderStatus.FINISHED:
                    job_execution.start_time = job_execution_info['start_time']
                    job_execution.end_time = job_execution_info['end_time']
                    job_execution.running_status = SpiderStatus.FINISHED
            # commit
            db.session.commit()