def monitoring_job(self, job, stable_timeout=600): """ monitoring will stop if the job status is not changed in stable_timeout or the job status meets the target_status. monitoring returns a bool value whether the job completed and meet the target status, and the time (seconds) to complete the job. rtype: bool, int, int """ if not job: return False, 0, 0 if not job.job_id: return False, 0, 0 data = [] tags = {} stable_timestamp = datetime.datetime.now() create_time = datetime.datetime.now() while datetime.datetime.now() - stable_timestamp < datetime.timedelta( seconds=stable_timeout ): job_info = job.get_job_info() task_stats = job.get_task_stats(job_info) data.append(task_stats) # Prepare M3 tags and push data to M3 labels = job.get_labels(job_info) for label in labels: tags.update({label.key: label.value}) for state_name, task_num in task_stats.iteritems(): tags.update({"task_state": state_name}) self.m3_client.count( key="total_tasks_by_state", n=task_num, tags=tags ) if job.is_workflow_done(job_info): break if len(data) < 2 or DeepDiff(data[-1], data[-2]): # new record is different from previous stable_timestamp = datetime.datetime.now() time.sleep(10) if job.is_workflow_done(job_info) is False: return False, 0, 0 completion_du = job.get_completion_time(job_info, create_time) start_du = job.get_start_time(job_info, create_time) self.m3_client.timing("start_duration", start_du * 1000, tags) self.m3_client.timing("complete_duration", completion_du * 1000, tags) return True, start_du, completion_du
def get_job_info(self, job): return job.get_job_info()