def sync_job_status(self, project): for spider_service_instance in self.spider_service_instances: job_status = spider_service_instance.get_job_list(project.project_name) job_execution_list = JobExecution.list_uncomplete_job() job_execution_dict = dict( [(job_execution.service_job_execution_id, job_execution) for job_execution in job_execution_list]) # running for job_execution_info in job_status[SpiderStatus.RUNNING]: job_execution = job_execution_dict.get(job_execution_info['id']) if job_execution and job_execution.running_status == SpiderStatus.PENDING: job_execution.start_time = job_execution_info['start_time'] job_execution.running_status = SpiderStatus.RUNNING # finished for job_execution_info in job_status[SpiderStatus.FINISHED]: job_execution = job_execution_dict.get(job_execution_info['id']) if job_execution and job_execution.running_status != SpiderStatus.FINISHED: job_execution.start_time = job_execution_info['start_time'] job_execution.end_time = job_execution_info['end_time'] job_execution.running_status = SpiderStatus.FINISHED # commit db.session.commit()
def start_spider(self, job_instance): project = Project.find_project_by_id(job_instance.project_id) spider_name = job_instance.spider_name #arguments = {} #if job_instance.spider_arguments: # arguments = dict(map(lambda x: x.split("="), job_instance.spider_arguments.split(","))) from collections import defaultdict arguments = defaultdict(list) if job_instance.spider_arguments: for k, v in list(map(lambda x: x.split('=', 1), job_instance.spider_arguments.split(','))): arguments[k].append(v) threshold = 0 daemon_size = len(self.spider_service_instances) if job_instance.priority == JobPriority.HIGH: threshold = int(daemon_size / 2) if job_instance.priority == JobPriority.HIGHEST: threshold = int(daemon_size) threshold = 1 if threshold == 0 else threshold candidates = self.spider_service_instances leaders = [] if 'daemon' in arguments: for candidate in candidates: if candidate.server == arguments['daemon']: leaders = [candidate] else: # TODO optimize some better func to vote the leader for i in range(threshold): leaders.append(random.choice(candidates)) for leader in leaders: serviec_job_id = leader.start_spider(project.project_name, spider_name, arguments) job_execution = JobExecution() job_execution.project_id = job_instance.project_id job_execution.service_job_execution_id = serviec_job_id job_execution.job_instance_id = job_instance.id job_execution.create_time = datetime.datetime.now() job_execution.running_on = leader.server db.session.add(job_execution) db.session.commit()
def job_dashboard(project_id): return render_template("job_dashboard.html", job_status=JobExecution.list_jobs(project_id))
def project_stats(project_id): project = Project.find_project_by_id(project_id) run_stats = JobExecution.list_run_stats_by_hours(project_id) return render_template("project_stats.html", run_stats=run_stats)
def get(self, project_id): return JobExecution.list_jobs(project_id)