Пример #1
0
 async def load_stored_jobs(self) -> Set[int]:
     if self.redis is None:
         self.redis = await db.connect_with_redis()
     key = kvstore.sequential_jobs_key(app_id=self.app_id)
     stored_job_ids = {
         score
         for _, score in await self.redis.zrevrangebyscore(key,
                                                           withscores=True)
     }
     return stored_job_ids
Пример #2
0
    async def _report_metrics(self, fresh_metrics: ApplicationMetrics,
                              execution_timestamp):
        if self.redis is None:
            self.redis = await db.connect_with_redis()

        running_jobs: Dict[str, JobStages] = dict()
        completed_jobs: Dict[str, JobStages] = dict()
        for job_id, job_data in fresh_metrics.jobs_stages.items():
            if job_data.job.completionTime is None:
                running_jobs[job_id] = job_data
            else:
                completed_jobs[job_id] = job_data

        self.stored_job_ids |= {
            int(job_id)
            for job_id, job_data in fresh_metrics.jobs_stages.items()
        }

        args = list(
            itertools.chain.from_iterable([
                (int(job_id), job_data.dump())
                for job_id, job_data in completed_jobs.items()
            ]))
        if len(args) > 0:
            try:
                await self.redis.zadd(
                    kvstore.sequential_jobs_key(app_id=self.app_id), *args)
            except Exception as exc:
                raise

        value = ApplicationMetrics(
            executor_metrics=fresh_metrics.executor_metrics,
            jobs_stages=running_jobs).dump()
        await self.redis.set(self.app_id, value)

        if self.graphite is None:
            self.graphite = db.connect_with_graphtie("loader")
        ts_executor_metric_keys = (
            "totalGCTime",
            "totalShuffleRead",
            "totalShuffleWrite",
            "memoryUsed",
        )
        all_executor_metrics = fresh_metrics.executor_metrics
        ts_executor_metrics = {
            f"executors.{self.app_id}.{executor.id}.{k}":
            getattr(executor, k, 0)
            for executor in all_executor_metrics
            for k in ts_executor_metric_keys
        }
        try:
            self.graphite.send_dict(ts_executor_metrics)
        except Exception as exc:
            raise
Пример #3
0
 def update_state(selected_app_info, n_intervals):
     if not n_intervals or not n_intervals % 2 == 0:
         raise PreventUpdate
     if not selected_app_info:
         raise PreventUpdate
     app_id: Optional[ApplicationMetrics] = selected_app_info["app_id"]
     seq_job_data_raw = kvstore.client.zrevrangebyscore(
         kvinfo.sequential_jobs_key(app_id=app_id), min=0, max=500000, num=30, start=0
     )
     seq_job_data = [JobStages.from_json(x) for x in seq_job_data_raw]
     return self.render_app_info(app_id, selected_app_info.get("environment"), seq_job_data)
Пример #4
0
 async def _app_latest_apply(self, redis: Redis, graphite, app_id):
     data = await redis.zrevrangebyscore(
         kvstore.sequential_jobs_key(app_id=app_id), count=3, offset=0)
     last_jobs = [JobStages.from_json(d) for d in data]
     for job_data in last_jobs:
         job_group_alias = await self.resolve_job_group(
             app_id, job_data, redis)
         if job_group_alias is None:
             continue
         job_group_alias = job_group_alias.decode()
         await asyncio.gather(*[
             self.write_stage_test(
                 graphite, app_id, float(self.apply(stage_data)),
                 job_data.job.completionTime, job_group_alias)
             for stage_id, stage_data in job_data.stages.items()
         ])
Пример #5
0
    async def load_jobs(self, redis) -> List[JobStages]:
        reported_jobs: Set[int] = await self.load_reported_jobs(redis)

        data = {
            score: job
            for job, score in await redis.zrevrangebyscore(
                kvstore.sequential_jobs_key(app_id=self.app_id),
                withscores=True)
        }
        if not data:
            print(f"{self.processor_id}: No data")
            return []
        print(f"Data: {len(data)} lines")

        job_ids_to_process: List[int] = sorted(data.keys() -
                                               reported_jobs)[-self._batch:]
        print("Job ids: ", job_ids_to_process)

        return [JobStages.from_json(data[jid]) for jid in job_ids_to_process]
Пример #6
0
    def render_executor_task_stats(self, app_id):
        data_raw = kvstore.client.zrevrangebyscore(
            kvinfo.sequential_jobs_key(app_id=app_id),
            min=0,
            max=9999999,
            start=0,
            num=30)
        last_jobs = [JobStages.from_json(x) for x in data_raw]

        executor_times = defaultdict(lambda: defaultdict(lambda: 0))

        for job_stages in last_jobs:
            stages = job_stages.stages
            for stage_id, stage in stages.items():
                tasks = stage.tasks
                for _, task in tasks.items():
                    if "driver" in task.executorId:
                        continue
                    key = int(task.executorId)
                    try:
                        executors_des_time = task.taskMetrics[
                            "executorDeserializeTime"] / 10**6
                        executors_des_cpu_time = task.taskMetrics[
                            "executorDeserializeCpuTime"] / 10**9
                        executors_run_time = task.taskMetrics[
                            "executorRunTime"] / 10**6
                        executors_cpu_time = task.taskMetrics[
                            "executorCpuTime"] / 10**9
                        java_gc = task.taskMetrics["jvmGcTime"] / 10**6

                        executor_times["executors_des_cpu_time"][
                            key] += executors_des_cpu_time
                        executor_times["executors_des_nocpu_time"][
                            key] += executors_des_time - executors_des_cpu_time
                        executor_times["java_gc"][key] += java_gc
                        executor_times["executors_cpu_time"][
                            key] += executors_cpu_time - executors_des_cpu_time - java_gc
                        executor_times["executors_run_time"][
                            key] += executors_run_time - executors_cpu_time - executors_des_time + executors_des_cpu_time
                    except KeyError as exc:
                        continue

        colors = {
            "executors_run_time": "lightgray",
            "executors_cpu_time": "rosybrown",
            "executors_des_nocpu_time": "yellow",
            "executors_des_cpu_time": "orange",
            "java_gc": "lightblue",
        }

        fig = go.Figure()
        for metric_name, data in executor_times.items():
            fig.add_bar(x=[*data.keys()],
                        y=[*data.values()],
                        name=metric_name,
                        marker_color=colors[metric_name])
        fig.update_layout(
            barmode="relative",
            title_text="Memory distribution",
            transition_duration=500,
        )
        fig.update_yaxes(type="log")
        return fig