async def load_stored_jobs(self) -> Set[int]: if self.redis is None: self.redis = await db.connect_with_redis() key = kvstore.sequential_jobs_key(app_id=self.app_id) stored_job_ids = { score for _, score in await self.redis.zrevrangebyscore(key, withscores=True) } return stored_job_ids
async def _report_metrics(self, fresh_metrics: ApplicationMetrics, execution_timestamp): if self.redis is None: self.redis = await db.connect_with_redis() running_jobs: Dict[str, JobStages] = dict() completed_jobs: Dict[str, JobStages] = dict() for job_id, job_data in fresh_metrics.jobs_stages.items(): if job_data.job.completionTime is None: running_jobs[job_id] = job_data else: completed_jobs[job_id] = job_data self.stored_job_ids |= { int(job_id) for job_id, job_data in fresh_metrics.jobs_stages.items() } args = list( itertools.chain.from_iterable([ (int(job_id), job_data.dump()) for job_id, job_data in completed_jobs.items() ])) if len(args) > 0: try: await self.redis.zadd( kvstore.sequential_jobs_key(app_id=self.app_id), *args) except Exception as exc: raise value = ApplicationMetrics( executor_metrics=fresh_metrics.executor_metrics, jobs_stages=running_jobs).dump() await self.redis.set(self.app_id, value) if self.graphite is None: self.graphite = db.connect_with_graphtie("loader") ts_executor_metric_keys = ( "totalGCTime", "totalShuffleRead", "totalShuffleWrite", "memoryUsed", ) all_executor_metrics = fresh_metrics.executor_metrics ts_executor_metrics = { f"executors.{self.app_id}.{executor.id}.{k}": getattr(executor, k, 0) for executor in all_executor_metrics for k in ts_executor_metric_keys } try: self.graphite.send_dict(ts_executor_metrics) except Exception as exc: raise
def update_state(selected_app_info, n_intervals): if not n_intervals or not n_intervals % 2 == 0: raise PreventUpdate if not selected_app_info: raise PreventUpdate app_id: Optional[ApplicationMetrics] = selected_app_info["app_id"] seq_job_data_raw = kvstore.client.zrevrangebyscore( kvinfo.sequential_jobs_key(app_id=app_id), min=0, max=500000, num=30, start=0 ) seq_job_data = [JobStages.from_json(x) for x in seq_job_data_raw] return self.render_app_info(app_id, selected_app_info.get("environment"), seq_job_data)
async def _app_latest_apply(self, redis: Redis, graphite, app_id): data = await redis.zrevrangebyscore( kvstore.sequential_jobs_key(app_id=app_id), count=3, offset=0) last_jobs = [JobStages.from_json(d) for d in data] for job_data in last_jobs: job_group_alias = await self.resolve_job_group( app_id, job_data, redis) if job_group_alias is None: continue job_group_alias = job_group_alias.decode() await asyncio.gather(*[ self.write_stage_test( graphite, app_id, float(self.apply(stage_data)), job_data.job.completionTime, job_group_alias) for stage_id, stage_data in job_data.stages.items() ])
async def load_jobs(self, redis) -> List[JobStages]: reported_jobs: Set[int] = await self.load_reported_jobs(redis) data = { score: job for job, score in await redis.zrevrangebyscore( kvstore.sequential_jobs_key(app_id=self.app_id), withscores=True) } if not data: print(f"{self.processor_id}: No data") return [] print(f"Data: {len(data)} lines") job_ids_to_process: List[int] = sorted(data.keys() - reported_jobs)[-self._batch:] print("Job ids: ", job_ids_to_process) return [JobStages.from_json(data[jid]) for jid in job_ids_to_process]
def render_executor_task_stats(self, app_id): data_raw = kvstore.client.zrevrangebyscore( kvinfo.sequential_jobs_key(app_id=app_id), min=0, max=9999999, start=0, num=30) last_jobs = [JobStages.from_json(x) for x in data_raw] executor_times = defaultdict(lambda: defaultdict(lambda: 0)) for job_stages in last_jobs: stages = job_stages.stages for stage_id, stage in stages.items(): tasks = stage.tasks for _, task in tasks.items(): if "driver" in task.executorId: continue key = int(task.executorId) try: executors_des_time = task.taskMetrics[ "executorDeserializeTime"] / 10**6 executors_des_cpu_time = task.taskMetrics[ "executorDeserializeCpuTime"] / 10**9 executors_run_time = task.taskMetrics[ "executorRunTime"] / 10**6 executors_cpu_time = task.taskMetrics[ "executorCpuTime"] / 10**9 java_gc = task.taskMetrics["jvmGcTime"] / 10**6 executor_times["executors_des_cpu_time"][ key] += executors_des_cpu_time executor_times["executors_des_nocpu_time"][ key] += executors_des_time - executors_des_cpu_time executor_times["java_gc"][key] += java_gc executor_times["executors_cpu_time"][ key] += executors_cpu_time - executors_des_cpu_time - java_gc executor_times["executors_run_time"][ key] += executors_run_time - executors_cpu_time - executors_des_time + executors_des_cpu_time except KeyError as exc: continue colors = { "executors_run_time": "lightgray", "executors_cpu_time": "rosybrown", "executors_des_nocpu_time": "yellow", "executors_des_cpu_time": "orange", "java_gc": "lightblue", } fig = go.Figure() for metric_name, data in executor_times.items(): fig.add_bar(x=[*data.keys()], y=[*data.values()], name=metric_name, marker_color=colors[metric_name]) fig.update_layout( barmode="relative", title_text="Memory distribution", transition_duration=500, ) fig.update_yaxes(type="log") return fig