Exemplo n.º 1
0
    def monitoring_job(self, job, stable_timeout=600):
        """
        monitoring will stop if the job status is not changed in stable_timeout
        or the job status meets the target_status. monitoring returns a bool
        value whether the job completed and meet the target status, and the
        time (seconds) to complete the job.
        rtype: bool, int, int
        """

        if not job:
            return False, 0, 0

        if not job.job_id:
            return False, 0, 0

        data = []
        tags = {}
        stable_timestamp = datetime.datetime.now()
        create_time = datetime.datetime.now()
        while datetime.datetime.now() - stable_timestamp < datetime.timedelta(
            seconds=stable_timeout
        ):
            job_info = job.get_job_info()
            task_stats = job.get_task_stats(job_info)
            data.append(task_stats)

            # Prepare M3 tags and push data to M3
            labels = job.get_labels(job_info)
            for label in labels:
                tags.update({label.key: label.value})

            for state_name, task_num in task_stats.iteritems():
                tags.update({"task_state": state_name})
                self.m3_client.count(
                    key="total_tasks_by_state", n=task_num, tags=tags
                )

            if job.is_workflow_done(job_info):
                break

            if len(data) < 2 or DeepDiff(data[-1], data[-2]):
                # new record is different from previous
                stable_timestamp = datetime.datetime.now()
            time.sleep(10)

        if job.is_workflow_done(job_info) is False:
            return False, 0, 0

        completion_du = job.get_completion_time(job_info, create_time)
        start_du = job.get_start_time(job_info, create_time)
        self.m3_client.timing("start_duration", start_du * 1000, tags)
        self.m3_client.timing("complete_duration", completion_du * 1000, tags)

        return True, start_du, completion_du
Exemplo n.º 2
0
 def get_job_info(self, job):
     return job.get_job_info()