def _collect_per_statement_metrics(self, db): # type: (pymysql.connections.Connection) -> List[Metric] metrics = [] # type: List[Metric] def keyfunc(row): return (row['schema'], row['digest']) monotonic_rows = self._query_summary_per_statement(db) monotonic_rows = self._merge_duplicate_rows(monotonic_rows, key=keyfunc) rows = self._state.compute_derivative_rows(monotonic_rows, STATEMENT_METRICS.keys(), key=keyfunc) metrics.append(('dd.mysql.queries.query_rows_raw', len(rows), [])) rows = generate_synthetic_rows(rows) rows = apply_row_limits( rows, self.config.statement_metrics_limits or DEFAULT_STATEMENT_METRICS_LIMITS, tiebreaker_metric='count', tiebreaker_reverse=True, key=keyfunc, ) metrics.append(('dd.mysql.queries.query_rows_limited', len(rows), [])) for row in rows: tags = [] tags.append('digest:' + row['digest']) if row['schema'] is not None: tags.append('schema:' + row['schema']) try: obfuscated_statement = datadog_agent.obfuscate_sql( row['query']) except Exception as e: self.log.warning("Failed to obfuscate query '%s': %s", row['query'], e) continue tags.append('query_signature:' + compute_sql_signature(obfuscated_statement)) tags.append('query:' + normalize_query_tag(obfuscated_statement).strip()) for col, name in STATEMENT_METRICS.items(): value = row[col] metrics.append((name, value, tags)) return metrics
def _normalize_queries(self, rows): normalized_rows = [] for row in rows: normalized_row = dict(copy.copy(row)) try: obfuscated_statement = datadog_agent.obfuscate_sql(row['query']) except Exception as e: # obfuscation errors are relatively common so only log them during debugging self._log.debug("Failed to obfuscate query '%s': %s", row['query'], e) continue normalized_row['query'] = obfuscated_statement normalized_row['query_signature'] = compute_sql_signature(obfuscated_statement) normalized_rows.append(normalized_row) return normalized_rows
def _normalize_queries(self, rows): normalized_rows = [] for row in rows: normalized_row = dict(copy.copy(row)) try: obfuscated_statement = datadog_agent.obfuscate_sql( row['digest_text']) except Exception as e: self.log.warning("Failed to obfuscate query '%s': %s", row['digest_text'], e) continue normalized_row['digest_text'] = obfuscated_statement normalized_row['query_signature'] = compute_sql_signature( obfuscated_statement) normalized_rows.append(normalized_row) return normalized_rows
def obfuscate_sql_with_metadata(query, options=None): if not query: return {'query': '', 'metadata': {}} statement = datadog_agent.obfuscate_sql(query, options) # The `obfuscate_sql` testing stub returns bytes, so we have to handle that here. # The actual `obfuscate_sql` method in the agent's Go code returns a JSON string. statement = to_native_string(statement.strip()) # Older agents may not have the new metadata API which returns a JSON string, so we must support cases where # newer integrations are running on an older agent. We use this "shortcut" to determine if we've received # a JSON string to avoid throwing excessive exceptions. We found that orjson leaks memory when failing # to parse these strings which are not valid json. Note, this condition is only relevant for integrations # running on agent versions < 7.34 if not statement.startswith('{'): return {'query': statement, 'metadata': {}} statement_with_metadata = json.loads(statement) metadata = statement_with_metadata.get('metadata', {}) tables = metadata.pop('tables_csv', None) tables = [table.strip() for table in tables.split(',') if table != ''] if tables else None statement_with_metadata['metadata']['tables'] = tables return statement_with_metadata
def _collect_plan_for_statement(self, row): # Plans have several important signatures to tag events with: # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees # - `resource_hash` - hash computed off the raw sql text to match apm resources # - `query_signature` - hash computed from the digest text to match query metrics try: obfuscated_statement = datadog_agent.obfuscate_sql(row['sql_text']) obfuscated_digest_text = datadog_agent.obfuscate_sql( row['digest_text']) except Exception: # do not log the raw sql_text to avoid leaking sensitive data into logs. digest_text is safe as parameters # are obfuscated by the database self._log.debug("Failed to obfuscate statement: %s", row['digest_text']) self._check.count("dd.mysql.statement_samples.error", 1, tags=self._tags + ["error:sql-obfuscate"]) return None apm_resource_hash = compute_sql_signature(obfuscated_statement) query_signature = compute_sql_signature(obfuscated_digest_text) query_cache_key = (row['current_schema'], query_signature) if query_cache_key in self._explained_statements_cache: return None self._explained_statements_cache[query_cache_key] = True plan = None with closing(self._get_db_connection().cursor()) as cursor: try: plan = self._explain_statement(cursor, row['sql_text'], row['current_schema'], obfuscated_statement) except Exception as e: self._check.count("dd.mysql.statement_samples.error", 1, tags=self._tags + ["error:explain-{}".format(type(e))]) self._log.exception("Failed to explain statement: %s", obfuscated_statement) normalized_plan, obfuscated_plan, plan_signature, plan_cost = None, None, None, None if plan: normalized_plan = datadog_agent.obfuscate_sql_exec_plan( plan, normalize=True) if plan else None obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan) plan_signature = compute_exec_plan_signature(normalized_plan) plan_cost = self._parse_execution_plan_cost(plan) query_plan_cache_key = (query_cache_key, plan_signature) if query_plan_cache_key not in self._seen_samples_cache: self._seen_samples_cache[query_plan_cache_key] = True return { "timestamp": row["timer_end_time_s"] * 1000, "host": self._db_hostname, "service": self._service, "ddsource": "mysql", "ddtags": self._tags_str, "duration": row['timer_wait_ns'], "network": { "client": { "ip": row.get('processlist_host', None), } }, "db": { "instance": row['current_schema'], "plan": { "definition": obfuscated_plan, "cost": plan_cost, "signature": plan_signature }, "query_signature": query_signature, "resource_hash": apm_resource_hash, "statement": obfuscated_statement, }, 'mysql': { k: v for k, v in row.items() if k not in EVENTS_STATEMENTS_SAMPLE_EXCLUDE_KEYS }, }
def _collect_plan_for_statement(self, row): try: obfuscated_statement = datadog_agent.obfuscate_sql(row['query']) except Exception as e: self._log.debug("Failed to obfuscate statement: %s", e) self._check.count("dd.postgres.statement_samples.error", 1, tags=self._tags + ["error:sql-obfuscate"]) return None # limit the rate of explains done to the database query_signature = compute_sql_signature(obfuscated_statement) if query_signature in self._explained_statements_cache: return None self._explained_statements_cache[query_signature] = True # Plans have several important signatures to tag events with. Note that for postgres, the # query_signature and resource_hash will be the same value. # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees # - `resource_hash` - hash computed off the raw sql text to match apm resources # - `query_signature` - hash computed from the raw sql text to match query metrics plan_dict = self._run_explain(row['query'], obfuscated_statement) plan, normalized_plan, obfuscated_plan, plan_signature, plan_cost = None, None, None, None, None if plan_dict: plan = json.dumps(plan_dict) # if we're using the orjson implementation then json.dumps returns bytes plan = plan.decode('utf-8') if isinstance(plan, bytes) else plan normalized_plan = datadog_agent.obfuscate_sql_exec_plan(plan, normalize=True) obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan) plan_signature = compute_exec_plan_signature(normalized_plan) plan_cost = plan_dict.get('Plan', {}).get('Total Cost', 0.0) or 0.0 statement_plan_sig = (query_signature, plan_signature) if statement_plan_sig not in self._seen_samples_cache: self._seen_samples_cache[statement_plan_sig] = True event = { "host": self._db_hostname, "service": self._service, "ddsource": "postgres", "ddtags": self._tags_str, "network": { "client": { "ip": row.get('client_addr', None), "port": row.get('client_port', None), "hostname": row.get('client_hostname', None), } }, "db": { "instance": row.get('datname', None), "plan": {"definition": obfuscated_plan, "cost": plan_cost, "signature": plan_signature}, "query_signature": query_signature, "resource_hash": query_signature, "application": row.get('application_name', None), "user": row['usename'], "statement": obfuscated_statement, }, 'postgres': {k: v for k, v in row.items() if k not in pg_stat_activity_sample_exclude_keys}, } event['timestamp'] = time.time() * 1000 if row['state'] in {'idle', 'idle in transaction'}: if row['state_change'] and row['query_start']: event['duration'] = (row['state_change'] - row['query_start']).total_seconds() * 1e9 # If the transaction is idle then we have a more specific "end time" than the current time at # which we're collecting this event. According to the postgres docs, all of the timestamps in # pg_stat_activity are `timestamp with time zone` so the timezone should always be present. However, # if there is something wrong and it's missing then we can't use `state_change` for the timestamp # of the event else we risk the timestamp being significantly off and the event getting dropped # during ingestion. if row['state_change'].tzinfo: event['timestamp'] = get_timestamp(row['state_change']) * 1000 return event
def _collect_per_statement_metrics(self, db): metrics = [] available_columns = self._get_pg_stat_statements_columns(db) missing_columns = PG_STAT_STATEMENTS_REQUIRED_COLUMNS - set( available_columns) if len(missing_columns) > 0: self.log.warning( 'Unable to collect statement metrics because required fields are unavailable: %s', ', '.join(list(missing_columns)), ) return metrics desired_columns = (list(PG_STAT_STATEMENTS_METRIC_COLUMNS.keys()) + list(PG_STAT_STATEMENTS_OPTIONAL_COLUMNS) + list(PG_STAT_STATEMENTS_TAG_COLUMNS.keys())) query_columns = list( set(desired_columns) & set(available_columns) | set(PG_STAT_STATEMENTS_TAG_COLUMNS.keys())) rows = self._execute_query( db.cursor(cursor_factory=psycopg2.extras.DictCursor), STATEMENTS_QUERY.format( cols=', '.join(query_columns), pg_stat_statements_view=self.config.pg_stat_statements_view, limit=DEFAULT_STATEMENTS_LIMIT, ), params=(self.config.dbname, ), ) if not rows: return metrics def row_keyfunc(row): # old versions of pg_stat_statements don't have a query ID so fall back to the query string itself queryid = row['queryid'] if 'queryid' in row else row['query'] return (queryid, row['datname'], row['rolname']) rows = self._state.compute_derivative_rows( rows, PG_STAT_STATEMENTS_METRIC_COLUMNS.keys(), key=row_keyfunc) rows = apply_row_limits(rows, DEFAULT_STATEMENT_METRIC_LIMITS, tiebreaker_metric='calls', tiebreaker_reverse=True, key=row_keyfunc) for row in rows: try: normalized_query = datadog_agent.obfuscate_sql(row['query']) if not normalized_query: self.log.warning( "Obfuscation of query '%s' resulted in empty query", row['query']) continue except Exception as e: # If query obfuscation fails, it is acceptable to log the raw query here because the # pg_stat_statements table contains no parameters in the raw queries. self.log.warning("Failed to obfuscate query '%s': %s", row['query'], e) continue query_signature = compute_sql_signature(normalized_query) # All "Deep Database Monitoring" statement-level metrics are tagged with a `query_signature` # which uniquely identifies the normalized query family. Where possible, this hash should # match the hash of APM "resources" (https://docs.datadoghq.com/tracing/visualization/resource/) # when the resource is a SQL query. Postgres' query normalization in the `pg_stat_statements` table # preserves most of the original query, so we tag the `resource_hash` with the same value as the # `query_signature`. The `resource_hash` tag should match the *actual* APM resource hash most of # the time, but not always. So this is a best-effort approach to link these metrics to APM metrics. tags = [ 'query_signature:' + query_signature, 'resource_hash:' + query_signature ] for column, tag_name in PG_STAT_STATEMENTS_TAG_COLUMNS.items(): if column not in row: continue value = row[column] if column == 'query': value = normalize_query_tag(normalized_query) tags.append('{tag_name}:{value}'.format(tag_name=tag_name, value=value)) for column, metric_name in PG_STAT_STATEMENTS_METRIC_COLUMNS.items( ): if column not in row: continue value = row[column] if column == 'total_time': # All "Deep Database Monitoring" timing metrics are in nanoseconds # Postgres tracks pg_stat* timing stats in milliseconds value = milliseconds_to_nanoseconds(value) metrics.append((metric_name, value, tags)) return metrics
def _collect_plan_for_statement(self, row): try: obfuscated_statement = datadog_agent.obfuscate_sql(row['query']) except Exception as e: self._log.debug("Failed to obfuscate statement: %s", e) self._check.count("dd.postgres.statement_samples.error", 1, tags=self._tags + ["error:sql-obfuscate"]) return None # limit the rate of explains done to the database query_signature = compute_sql_signature(obfuscated_statement) if query_signature in self._explained_statements_cache: return None self._explained_statements_cache[query_signature] = True # Plans have several important signatures to tag events with. Note that for postgres, the # query_signature and resource_hash will be the same value. # - `plan_signature` - hash computed from the normalized JSON plan to group identical plan trees # - `resource_hash` - hash computed off the raw sql text to match apm resources # - `query_signature` - hash computed from the raw sql text to match query metrics plan_dict = self._run_explain(row['query'], obfuscated_statement) plan, normalized_plan, obfuscated_plan, plan_signature, plan_cost = None, None, None, None, None if plan_dict: plan = json.dumps(plan_dict) normalized_plan = datadog_agent.obfuscate_sql_exec_plan( plan, normalize=True) obfuscated_plan = datadog_agent.obfuscate_sql_exec_plan(plan) plan_signature = compute_exec_plan_signature(normalized_plan) plan_cost = plan_dict.get('Plan', {}).get('Total Cost', 0.0) or 0.0 statement_plan_sig = (query_signature, plan_signature) if statement_plan_sig not in self._seen_samples_cache: self._seen_samples_cache[statement_plan_sig] = True event = { "host": self._db_hostname, "service": self._service, "ddsource": "postgres", "ddtags": self._tags_str, "network": { "client": { "ip": row.get('client_addr', None), "port": row.get('client_port', None), "hostname": row.get('client_hostname', None), } }, "db": { "instance": row.get('datname', None), "plan": { "definition": obfuscated_plan, "cost": plan_cost, "signature": plan_signature }, "query_signature": query_signature, "resource_hash": query_signature, "application": row.get('application_name', None), "user": row['usename'], "statement": obfuscated_statement, }, 'postgres': { k: v for k, v in row.items() if k not in pg_stat_activity_sample_exclude_keys }, } if row['state'] in {'idle', 'idle in transaction'}: if row['state_change'] and row['query_start']: event['duration'] = ( row['state_change'] - row['query_start']).total_seconds() * 1e9 event['timestamp'] = time.mktime( row['state_change'].timetuple()) * 1000 else: event['timestamp'] = time.time() * 1000 return event