def _record_timing(self, status): timing = self.timing_context assert timing.is_finished() duration = timing.duration device_id = self.params.device_id if duration > 20 or status == 412: if status == 412: # use last sync log since there is no current sync log sync_log_id = self.params.sync_log_id or 'N/A' else: sync_log = self.restore_state.current_sync_log sync_log_id = sync_log._id if sync_log else 'N/A' log = logging.getLogger(__name__) log.info( "restore %s: user=%s device=%s domain=%s status=%s duration=%.3f", sync_log_id, self.restore_user.username, device_id, self.domain, status, duration, ) is_webapps = device_id and device_id.startswith("WebAppsLogin") tags = [ 'status_code:{}'.format(status), 'device_type:{}'.format('webapps' if is_webapps else 'other'), ] maybe_add_domain_tag(self.domain, tags) timer_buckets = (1, 5, 20, 60, 120, 300, 600) for timer in timing.to_list(exclude_root=True): if timer.name in RESTORE_SEGMENTS: segment = RESTORE_SEGMENTS[timer.name] bucket = bucket_value(timer.duration, timer_buckets, 's') datadog_counter( 'commcare.restores.{}'.format(segment), tags=tags + ['duration:%s' % bucket], ) elif timer.name.startswith('fixture:'): bucket = bucket_value(timer.duration, timer_buckets, 's') datadog_counter( 'commcare.restores.fixture', tags=tags + [ 'duration:%s' % bucket, timer.name, ], ) tags.append('type:%s' % 'sync' if self.params.sync_log_id else 'restore') tags.append('duration:%s' % bucket_value(timing.duration, timer_buckets, 's')) if settings.ENTERPRISE_MODE and self.params.app and self.params.app.copy_of: app_name = slugify(self.params.app.name) tags.append('app:{}-{}'.format(app_name, self.params.app.version)) datadog_counter('commcare.restores.count', tags=tags)
def _record_timing(self, status): timing = self.timing_context assert timing.is_finished() duration = timing.duration if timing is not None else -1 device_id = self.params.device_id if duration > 20 or status == 412: if status == 412: # use last sync log since there is no current sync log sync_log_id = self.params.sync_log_id or 'N/A' else: sync_log = self.restore_state.current_sync_log sync_log_id = sync_log._id if sync_log else 'N/A' log = logging.getLogger(__name__) log.info( "restore %s: user=%s device=%s domain=%s status=%s duration=%.3f", sync_log_id, self.restore_user.username, device_id, self.domain, status, duration, ) is_webapps = device_id and device_id.startswith("WebAppsLogin") tags = [ 'status_code:{}'.format(status), 'device_type:{}'.format('webapps' if is_webapps else 'other'), ] env = settings.SERVER_ENVIRONMENT if (env, self.domain) in settings.RESTORE_TIMING_DOMAINS: tags.append('domain:{}'.format(self.domain)) if timing is not None: timer_buckets = (5, 20, 60, 120) for timer in timing.to_list(exclude_root=True): if timer.name in RESTORE_SEGMENTS: segment = RESTORE_SEGMENTS[timer.name] bucket = bucket_value(timer.duration, timer_buckets, 's') datadog_counter( 'commcare.restores.{}'.format(segment), tags=tags + ['duration:%s' % bucket], ) elif timer.name.startswith('fixture:'): bucket = bucket_value(timer.duration, timer_buckets, 's') datadog_counter( 'commcare.restores.fixture', tags=tags + [ 'duration:%s' % bucket, timer.name, ], ) tags.append('duration:%s' % bucket_value(timing.duration, timer_buckets, 's')) if settings.ENTERPRISE_MODE and self.params.app and self.params.app.copy_of: app_name = slugify(self.params.app.name) tags.append('app:{}-{}'.format(app_name, self.params.app.version)) datadog_counter('commcare.restores.count', tags=tags)
def _send_timings(self, timing_context): metric_name_template = "commcare.%s.count" metric_name_template_normalized = "commcare.%s.count.normalized" for timing in timing_context.to_list(): datadog_counter( metric_name_template % timing.full_name, tags=['duration:%s' % bucket_value(timing.duration, TIMING_BUCKETS)]) normalize_denominator = getattr(timing, 'normalize_denominator', None) if normalize_denominator: datadog_counter( metric_name_template_normalized % timing.full_name, tags=['duration:%s' % bucket_value(timing.duration / normalize_denominator, NORMALIZED_TIMING_BUCKETS)])
def _send_timings(self, timing_context): metric_name_template = "commcare.%s.count" metric_name_template_normalized = "commcare.%s.count.normalized" for timing in timing_context.to_list(): datadog_counter( metric_name_template % timing.full_name, tags=['duration:%s' % bucket_value(timing.duration, TIMING_BUCKETS)]) normalize_denominator = getattr(timing, 'normalize_denominator', None) if normalize_denominator: datadog_counter( metric_name_template_normalized % timing.full_name, tags=['duration:%s' % bucket_value(timing.duration / normalize_denominator, NORMALIZED_TIMING_BUCKETS)])
def _record_metrics(tags, submission_type, response, result=None, timer=None): tags += [ 'submission_type:{}'.format(submission_type), 'status_code:{}'.format(response.status_code) ] if response.status_code == 201 and timer and result: tags += [ 'duration:%s' % bucket_value(timer.duration, (5, 10, 20), 's'), 'case_count:%s' % bucket_value(len(result.cases), (2, 5, 10)), 'ledger_count:%s' % bucket_value(len(result.ledgers), (2, 5, 10)), ] datadog_counter('commcare.xform_submissions.count', tags=tags)
def _record_metrics(tags, submission_type, response, result=None, timer=None): tags += [ 'submission_type:{}'.format(submission_type), 'status_code:{}'.format(response.status_code) ] if response.status_code == 201 and timer and result: tags += [ 'duration:%s' % bucket_value(timer.duration, (5, 10, 20), 's'), 'case_count:%s' % bucket_value(len(result.cases), (2, 5, 10)), 'ledger_count:%s' % bucket_value(len(result.ledgers), (2, 5, 10)), ] datadog_counter('commcare.xform_submissions.count', tags=tags)
def new_stop(name=None): original_stop(name) if callback: callback(timer.duration) datadog_counter( metric, tags=list(tags) + ['duration:%s' % bucket_value(timer.duration, timing_buckets, 's')] )
def new_stop(name=None): original_stop(name) if callback: callback(timer.duration) datadog_counter( metric, tags=list(tags) + ['duration:%s' % bucket_value(timer.duration, timing_buckets, 's')] )
def _send_timings(self): metric_name_template = "commcare.%s.count" metric_name_template_normalized = "commcare.%s.count.normalized" for timing in self.timing.to_list(): datadog_counter(metric_name_template % timing.full_name, tags=[ 'duration:%s' % bucket_value(timing.duration, TIMING_BUCKETS) ]) if getattr(timing, "normalize_denominator", 0): datadog_counter( metric_name_template_normalized % timing.full_name, tags=[ 'duration:%s' % bucket_value( timing.duration / timing.normalize_denominator, NORMALIZED_TIMING_BUCKETS, ) ])
def __exit__(self, exc_type, exc_val, exc_tb): if self.duration_buckets: duration = time.time() - self.timer_start duration_value = bucket_value(duration, self.duration_buckets, unit='s') tags = ['duration:{}'.format(duration_value)] else: tags = None if not exc_type: datadog_counter(self.succeeded_name, tags=tags) else: datadog_counter(self.failed_name, tags=tags)
def _record_metrics(tags, submission_type, response, timer=None, xform=None): if xform and xform.metadata and xform.metadata.timeEnd and xform.received_on: lag = xform.received_on - xform.metadata.timeEnd lag_days = lag.total_seconds() / 86400 tags += [ 'lag:%s' % bucket_value(lag_days, (1, 2, 4, 7, 14, 31, 90), 'd') ] tags += [ 'submission_type:{}'.format(submission_type), 'status_code:{}'.format(response.status_code) ] if timer: tags += [ 'duration:%s' % bucket_value(timer.duration, (1, 5, 20, 60, 120, 300, 600), 's'), ] datadog_counter('commcare.xform_submissions.count', tags=tags)
def _record_metrics(tags, submission_type, response, timer=None): tags += [ 'submission_type:{}'.format(submission_type), 'status_code:{}'.format(response.status_code) ] if response.status_code == 201 and timer: tags += [ 'duration:%s' % bucket_value(timer.duration, (1, 5, 20, 60, 120, 300, 600), 's'), ] datadog_counter('commcare.xform_submissions.count', tags=tags)
def __exit__(self, exc_type, exc_val, exc_tb): if self.duration_buckets: duration = time.time() - self.timer_start duration_value = bucket_value(duration, self.duration_buckets, unit='s') tags = ['duration:{}'.format(duration_value)] else: tags = None if not exc_type: datadog_counter(self.succeeded_name, tags=tags) else: datadog_counter(self.failed_name, tags=tags)
def rate_limit_submission_by_delaying(domain, max_wait): if not submission_rate_limiter.allow_usage(domain): with TimingContext() as timer: acquired = submission_rate_limiter.wait(domain, timeout=max_wait) if acquired: duration_tag = bucket_value(timer.duration, [1, 5, 10, 15, 20], unit='s') else: duration_tag = 'timeout' datadog_counter('commcare.xform_submissions.rate_limited.test', tags=[ 'domain:{}'.format(domain), 'duration:{}'.format(duration_tag) ]) submission_rate_limiter.report_usage(domain)
def _commit_timing(queryset): # only send to datadog on initial query evaluation commit = queryset._mptt_set._result_cache is None try: yield finally: if commit and queryset._mptt_set._result_cache is not None: timing = queryset._timing for key in timing.timers: bucket = bucket_value(timing.duration(key), TIME_BUCKETS, "s") datadog_counter( 'commcare.locations.%s.%s.count' % (timing.name, key), tags=['duration:%s' % bucket], )
def _delay_and_report_rate_limit_submission(domain, max_wait, datadog_metric): with TimingContext() as timer: acquired = submission_rate_limiter.wait(domain, timeout=max_wait) if acquired: duration_tag = bucket_value(timer.duration, [.5, 1, 5, 10, 15], unit='s') elif timer.duration < max_wait: duration_tag = 'quick_reject' else: duration_tag = 'delayed_reject' datadog_counter(datadog_metric, tags=[ f'domain:{domain}', f'duration:{duration_tag}', f'throttle_method:{"delay" if acquired else "reject"}' ]) return acquired
def _report_import_timings(self, timer, results): active_duration = timer.duration - self._total_delayed_duration rows_created = results['created_count'] rows_updated = results['match_count'] rows_failed = results['failed_count'] # Add 1 to smooth / prevent denominator from ever being zero active_duration_per_case = active_duration / (rows_created + rows_updated + rows_failed + 1) active_duration_per_case_bucket = bucket_value( active_duration_per_case * 1000, [50, 70, 100, 150, 250, 350, 500], unit='ms') for rows, status in ((rows_created, 'created'), (rows_updated, 'updated'), (rows_failed, 'error')): datadog_counter('commcare.case_importer.cases', rows, tags=[ 'active_duration_per_case:{}'.format(active_duration_per_case_bucket), 'status:{}'.format(status), ])
def pre_submit_hook(self): if rate_limit_submission(self.domain): # the duration of the last submission is a combined heuristic # for the amount of load on the databases # and the amount of load that the requests from this import put on the databases. # The amount of time to wait, during a high submission period # and while this project is using up more than its fair share # should be proportional to this heuristic. # For a fully throttled domain, this will up to double # the amount of time the case import takes datadog_counter( 'commcare.case_importer.import_delays', tags=[ 'domain:{}'.format(self.domain), 'duration:{}'.format(bucket_value( self._last_submission_duration, [5, 7, 10, 15, 25, 35, 50], unit='s')) ] ) self._total_delayed_duration += self._last_submission_duration time.sleep(self._last_submission_duration)
def _record_metrics(tags, submission_type, response, timer=None, xform=None): if xform and xform.metadata: lag = xform.received_on - xform.metadata.timeEnd datadog_gauge('commcare.xform_submissions.lag', int(lag.total_seconds()), tags=tags) tags += [ 'submission_type:{}'.format(submission_type), 'status_code:{}'.format(response.status_code) ] if response.status_code == 201 and timer: tags += [ 'duration:%s' % bucket_value(timer.duration, (1, 5, 20, 60, 120, 300, 600), 's'), ] datadog_counter('commcare.xform_submissions.count', tags=tags)
def _histogram(self, name: str, value: float, bucket_tag: str, buckets: List[int], bucket_unit: str = '', tags: Dict[str, str] = None, documentation: str = ''): """ This implementation of histogram uses tagging to record the buckets. It does not use the Datadog Histogram metric type. The metric itself will be incremented by 1 on each call. The value passed to `metrics_histogram` will be used to create the bucket tag. For example: :: h = metrics_histogram( 'commcare.request.duration', 1.4, bucket_tag='duration', buckets=[1,2,3], bucket_units='ms', tags=tags ) # resulting metrics # commcare.request.duration:1|c|#duration:lt_2ms For more explanation about why this implementation was chosen see: * https://github.com/dimagi/commcare-hq/pull/17080 * https://github.com/dimagi/commcare-hq/pull/17030#issuecomment-315794700 """ tags = _format_tags(tags) if not tags: tags = [] bucket = bucket_value(value, buckets, bucket_unit) tags.append(f'{bucket_tag}:{bucket}') _datadog_record(statsd.increment, name, 1, tags)
def record_feed_access_in_datadog(request, config_id, duration, response): config = ExportInstance.get(config_id) username = request.couch_user.username json_response = json.loads(response.content.decode('utf-8')) rows = json_response['value'] row_count = len(rows) try: column_count = len(rows[0]) except IndexError: column_count = 0 datadog_counter('commcare.odata_feed.test_v3', tags=[ 'domain:{}'.format(request.domain), 'feed_id:{}'.format(config_id), 'feed_type:{}'.format(config.type), 'username:{}'.format(username), 'row_count:{}'.format(row_count), 'column_count:{}'.format(column_count), 'size:{}'.format(len(response.content)), 'duration:{}'.format(duration), 'duration_bucket:{}'.format( bucket_value(duration, (1, 5, 20, 60, 120, 300, 600), 's')), ])
def test_bucket_value(self, value, buckets, unit, expected): self.assertEqual(bucket_value(value, buckets, unit), expected)
def test_bucket_value(self, value, buckets, unit, expected): self.assertEqual(bucket_value(value, buckets, unit), expected)