def _set_jobs_metrics(now): state_map = {task_result.State.RUNNING: 'running', task_result.State.PENDING: 'pending'} query_iter = task_result.get_result_summaries_query( None, None, 'created_ts', 'pending_running', None).iter() jobs_counts = defaultdict(lambda: 0) jobs_pending_distributions = defaultdict( lambda: gae_ts_mon.Distribution(_bucketer)) jobs_max_pending_durations = defaultdict( lambda: 0.0) while (yield query_iter.has_next_async()): summary = query_iter.next() status = state_map.get(summary.state, '') fields = extract_job_fields(summary.tags) target_fields = dict(TARGET_FIELDS) if summary.bot_id: target_fields['hostname'] = 'autogen:' + summary.bot_id if summary.bot_id and status == 'running': jobs_running.set(True, target_fields=target_fields, fields=fields) fields['status'] = status key = tuple(sorted(fields.iteritems())) jobs_counts[key] += 1 pending_duration = summary.pending_now(now) if pending_duration is not None: jobs_pending_distributions[key].add(pending_duration.total_seconds()) jobs_max_pending_durations[key] = max( jobs_max_pending_durations[key], pending_duration.total_seconds()) for key, count in jobs_counts.iteritems(): jobs_active.set(count, target_fields=TARGET_FIELDS, fields=dict(key)) for key, distribution in jobs_pending_distributions.iteritems(): jobs_pending_durations.set( distribution, target_fields=TARGET_FIELDS, fields=dict(key)) for key, val in jobs_max_pending_durations.iteritems(): jobs_max_pending_duration.set( val, target_fields=TARGET_FIELDS, fields=dict(key))
def set_build_latency(metric_sec, bucket, must_be_never_leased): q = model.Build.query( model.Build.bucket == bucket, model.Build.status == model.BuildStatus.SCHEDULED, ) if must_be_never_leased: q = q.filter(model.Build.never_leased == True) else: # Reuse the index that has never_leased q = q.filter(model.Build.never_leased.IN((True, False))) now = utils.utcnow() dist = gae_ts_mon.Distribution(gae_ts_mon.GeometricBucketer()) for e in q.iter(projection=[model.Build.create_time]): latency = (now - e.create_time).total_seconds() dist.add(latency) if dist.count == 0: dist.add(0) metric_sec.set(dist, {'bucket': bucket}, target_fields=GLOBAL_TARGET_FIELDS)
def _set_jobs_metrics(payload): params = _ShardParams(payload) state_map = {task_result.State.RUNNING: 'running', task_result.State.PENDING: 'pending'} jobs_counts = defaultdict(lambda: 0) jobs_total = 0 jobs_pending_distributions = defaultdict( lambda: gae_ts_mon.Distribution(_bucketer)) jobs_max_pending_durations = defaultdict( lambda: 0.0) query_iter = task_result.get_result_summaries_query( None, None, 'created_ts', 'pending_running', None).iter( produce_cursors=True, start_cursor=params.cursor) while query_iter.has_next(): runtime = (utils.utcnow() - params.start_time).total_seconds() if jobs_total >= _JOBS_PER_SHARD or runtime > _REQUEST_TIMEOUT_SEC: params.cursor = query_iter.cursor_after() params.task_count += 1 utils.enqueue_task(url='/internal/taskqueue/tsmon/jobs', queue_name='tsmon', payload=params.json()) params.task_count -= 1 # For accurate logging below. break params.count += 1 jobs_total += 1 summary = query_iter.next() status = state_map.get(summary.state, '') fields = _extract_job_fields(summary.tags) target_fields = dict(_TARGET_FIELDS) if summary.bot_id: target_fields['hostname'] = 'autogen:' + summary.bot_id if summary.bot_id and status == 'running': _jobs_running.set(True, target_fields=target_fields, fields=fields) fields['status'] = status key = tuple(sorted(fields.iteritems())) jobs_counts[key] += 1 pending_duration = summary.pending_now(utils.utcnow()) if pending_duration is not None: jobs_pending_distributions[key].add(pending_duration.total_seconds()) jobs_max_pending_durations[key] = max( jobs_max_pending_durations[key], pending_duration.total_seconds()) logging.debug( '_set_jobs_metrics: task %d started at %s, processed %d jobs (%d total)', params.task_count, params.task_start, jobs_total, params.count) # Global counts are sharded by task_num and aggregated in queries. target_fields = dict(_TARGET_FIELDS) target_fields['task_num'] = params.task_count for key, count in jobs_counts.iteritems(): _jobs_active.set(count, target_fields=target_fields, fields=dict(key)) for key, distribution in jobs_pending_distributions.iteritems(): _jobs_pending_durations.set( distribution, target_fields=target_fields, fields=dict(key)) for key, val in jobs_max_pending_durations.iteritems(): _jobs_max_pending_duration.set( val, target_fields=target_fields, fields=dict(key))
def update_histogram(query, count_attribute, metric): dist = gae_ts_mon.Distribution(gae_ts_mon.FixedWidthBucketer(1, 10)) for flake in query: dist.add(getattr(flake, count_attribute)) metric.set(dist)