def __init__(self, name, init_config, instances): super(Oracle, self).__init__(name, init_config, instances) self._server = self.instance.get('server') self._user = self.instance.get('user') self._password = self.instance.get('password') self._service = self.instance.get('service_name') self._jdbc_driver = self.instance.get('jdbc_driver_path') self._tags = self.instance.get('tags') or [] self._service_check_tags = ['server:{}'.format(self._server)] self._service_check_tags.extend(self._tags) self._cached_connection = None manager_queries = [] if not self.instance.get('only_custom_queries', False): manager_queries.extend([ queries.ProcessMetrics, queries.SystemMetrics, queries.TableSpaceMetrics ]) self._fix_custom_queries() self._query_manager = QueryManager( self, self.execute_query_raw, queries=manager_queries, error_handler=self.handle_query_error, tags=self._tags, ) self.check_initializations.append(self.validate_config) self.check_initializations.append(self._query_manager.compile_queries) self._current_errors = 0
def __init__(self, *args, **kwargs): # type: (*Any, **Any) -> None super(RethinkDBCheck, self).__init__(*args, **kwargs) self._config = Config(cast(Instance, self.instance)) if self._config.password: self.register_secret(self._config.password) self._conn = None # type: Optional[rethinkdb.net.Connection] manager_queries = [ queries.ClusterMetrics, queries.ServerMetrics, queries.DatabaseConfigMetrics, queries.DatabaseTableMetrics, queries.TableConfigMetrics, queries.ReplicaMetrics, queries.ShardMetrics, queries.JobMetrics, queries.CurrentIssuesMetrics, ] # type: list if self.is_metadata_collection_enabled: manager_queries.append(queries.VersionMetadata) self._query_manager = QueryManager( self, executor=self._execute_raw_query, queries=manager_queries, tags=self._config.tags, ) self._query_funcs = {} # type: Dict[str, Callable] self.check_initializations.append(self._query_manager.compile_queries)
def __init__(self, *args, **kwargs): super(SnowflakeCheck, self).__init__(*args, **kwargs) self.config = Config(self.instance) self._conn = None # Add default tags like account to all metrics self._tags = self.config.tags + ['account:{}'.format(self.config.account)] if self.config.password: self.register_secret(self.config.password) self.metric_queries = [] self.errors = [] for mgroup in self.config.metric_groups: try: self.metric_queries.extend(METRIC_GROUPS[mgroup]) except KeyError: self.errors.append(mgroup) if self.errors: self.log.warning('Invalid metric_groups found in snowflake conf.yaml: %s', (', '.join(self.errors))) if not self.metric_queries: raise ConfigurationError('No valid metric_groups configured, please list at least one.') self._query_manager = QueryManager(self, self.execute_query_raw, queries=self.metric_queries, tags=self._tags) self.check_initializations.append(self._query_manager.compile_queries)
def __init__(self, name, init_config, instances): super(TeradataCheck, self).__init__(name, init_config, instances) self._connect_params = None self._connection = None self._tags = [] self._query_errors = 0 self._tables_filter = None manager_queries = deepcopy(DEFAULT_QUERIES) if is_affirmative(self.instance.get('collect_res_usage_metrics', False)): manager_queries.extend(COLLECT_RES_USAGE) if is_affirmative( self.instance.get('collect_table_disk_metrics', False)): manager_queries.extend(COLLECT_ALL_SPACE) self._query_manager = QueryManager( self, self._execute_query_raw, queries=manager_queries, tags=self._tags, error_handler=self._executor_error_handler, ) self.check_initializations.append(self.initialize_config) self.check_initializations.append(self._query_manager.compile_queries)
def __init__(self, name, init_config, instances): super(SQLServer, self).__init__(name, init_config, instances) self.connection = None self.failed_connections = {} self.instance_metrics = [] self.instance_per_type_metrics = defaultdict(list) self.do_check = True self.autodiscovery = is_affirmative(self.instance.get('database_autodiscovery')) self.autodiscovery_include = self.instance.get('autodiscovery_include', ['.*']) self.autodiscovery_exclude = self.instance.get('autodiscovery_exclude', []) self._compile_patterns() self.autodiscovery_interval = self.instance.get('autodiscovery_interval', DEFAULT_AUTODISCOVERY_INTERVAL) self.databases = set() self.ad_last_check = 0 self.proc = self.instance.get('stored_procedure') self.proc_type_mapping = {'gauge': self.gauge, 'rate': self.rate, 'histogram': self.histogram} self.custom_metrics = init_config.get('custom_metrics', []) # use QueryManager to process custom queries self._query_manager = QueryManager(self, self.execute_query_raw, queries=[], tags=self.instance.get("tags", [])) self.check_initializations.append(self.config_checks) self.check_initializations.append(self._query_manager.compile_queries) self.check_initializations.append(self.initialize_connection)
def __init__(self, name, init_config, instances): super(Oracle, self).__init__(name, init_config, instances) ( self._server, self._user, self._password, self._service, self._jdbc_driver, self._tags, only_custom_queries, ) = self._get_config(self.instance) self.check_initializations.append(self.validate_config) self._connection = None manager_queries = [] if not only_custom_queries: manager_queries.extend([ queries.ProcessMetrics, queries.SystemMetrics, queries.TableSpaceMetrics ]) self._fix_custom_queries() self._query_manager = QueryManager( self, self.execute_query_raw, queries=manager_queries, tags=self._tags, ) self.check_initializations.append(self._query_manager.compile_queries)
def __init__(self, name, init_config, instances): # type: (str, dict, list) -> None super(VoltDBCheck, self).__init__(name, init_config, instances) self._config = Config(cast(Instance, self.instance), debug=self.log.debug) self.register_secret(self._config.password) self._client = Client( url=self._config.url, http_get=self.http.get, username=self._config.username, password=self._config.password, password_hashed=self._config.password_hashed, ) self._query_manager = QueryManager( self, self._execute_query_raw, queries=[ queries.CPUMetrics, queries.MemoryMetrics, queries.SnapshotStatusMetrics, queries.CommandLogMetrics, queries.ProcedureMetrics, queries.LatencyMetrics, queries.GCMetrics, queries.IOStatsMetrics, queries.TableMetrics, queries.IndexMetrics, ], tags=self._config.tags, ) self.check_initializations.append(self._query_manager.compile_queries)
def __init__(self, name, init_config, instances): super(SQLServer, self).__init__(name, init_config, instances) self.failed_connections = {} self.instance_metrics = [] self.instance_per_type_metrics = defaultdict(list) self.do_check = True self.proc = self.instance.get('stored_procedure') self.proc_type_mapping = { 'gauge': self.gauge, 'rate': self.rate, 'histogram': self.histogram } # use QueryManager to process custom queries self._query_manager = QueryManager(self, self.execute_query_raw, queries=[], tags=self.instance.get("tags", [])) self.check_initializations.append(self._query_manager.compile_queries) self.connection = Connection(init_config, self.instance, self.handle_service_check, self.log) # Pre-process the list of metrics to collect self.custom_metrics = init_config.get('custom_metrics', []) try: # check to see if the database exists before we try any connections to it db_exists, context = self.connection.check_database() if db_exists: if self.instance.get('stored_procedure') is None: with self.connection.open_managed_default_connection(): self._make_metric_list_to_collect(self.custom_metrics) else: # How much do we care that the DB doesn't exist? ignore = is_affirmative( self.instance.get("ignore_missing_database", False)) if ignore is not None and ignore: # not much : we expect it. leave checks disabled self.do_check = False self.log.warning( "Database %s does not exist. Disabling checks for this instance.", context) else: # yes we do. Keep trying msg = "Database {} does not exist. Please resolve invalid database and restart agent".format( context) raise ConfigurationError(msg) # Historically, the check does not raise exceptions on init failures # We continue that here for backwards compatibility, aside from the new Config exception except SQLConnectionError as e: self.log.exception("Error connecting to database: %s", e) except ConfigurationError: raise except Exception as e: self.log.exception("Initialization exception %s", e)
def set_up_query_manager(self): system_info = self.fetch_system_info() if system_info: query_list = [] QUERY_MAP = queries.query_map(self.config) is_7_3_or_higher = system_info.os_version > 7 or ( system_info.os_version == 7 and system_info.os_release >= 3) for query in self.config.queries: if query.name == "disk_usage": # disk_usage works differently on 7.2 vs 7.3 if is_7_3_or_higher: query_list.append( queries.get_base_disk_usage_73( self.config.query_timeout)) query_list.append( queries.get_disk_usage(self.config.query_timeout)) else: query_list.append( queries.get_base_disk_usage_72( self.config.query_timeout)) elif query.name == "subsystem": # subsystem is only supported on 7.3 if is_7_3_or_higher: query_list.append( queries.get_subsystem_info( self.config.query_timeout)) else: # For backwards compatibility, we don't fail self.log.info( "Skipping 'subsystem' query since target system is older than 7.3" ) elif query.name not in QUERY_MAP: raise ConfigurationError( "Unknown or unsupported query name: {}".format( query.name)) else: query_list.append(QUERY_MAP[query.name]) hostname = system_info.hostname # Override hostname with configuration if self.config.hostname: hostname = self.config.hostname self._query_manager = QueryManager( self, self.execute_query, tags=self.config.tags, queries=query_list, hostname=hostname, error_handler=self.handle_query_error, ) self._query_manager.compile_queries()
def __init__(self, name, init_config, instances): super(MySql, self).__init__(name, init_config, instances) self.qcache_stats = {} self.version = None self.config = MySQLConfig(self.instance) # Create a new connection on every check run self._conn = None self._query_manager = QueryManager(self, self.execute_query_raw, queries=[], tags=self.config.tags) self._statement_metrics = MySQLStatementMetrics(self.config) self.check_initializations.append(self._query_manager.compile_queries) self.innodb_stats = InnoDBMetrics() self.check_initializations.append(self.config.configuration_checks)
def create_query_manager(*args, **kwargs): executor = kwargs.pop('executor', None) if executor is None: executor = mock_executor() check = kwargs.pop('check', None) or AgentCheck('test', {}, [{}]) return QueryManager(check, executor, [Query(arg) for arg in args], **kwargs)
def __init__(self, *args, **kwargs): super(SnowflakeCheck, self).__init__(*args, **kwargs) self._config = Config(self.instance) self._conn = None self.proxy_host = self.init_config.get('proxy_host', None) self.proxy_port = self.init_config.get('proxy_port', None) self.proxy_user = self.init_config.get('proxy_user', None) self.proxy_password = self.init_config.get('proxy_password', None) # Add default tags like account to all metrics self._tags = self._config.tags + ['account:{}'.format(self._config.account)] if self._config.password: self.register_secret(self._config.password) if self._config.private_key_password: self.register_secret(self._config.private_key_password) if self._config.role == 'ACCOUNTADMIN': self.log.info( 'Snowflake `role` is set as `ACCOUNTADMIN` which should be used cautiously, ' 'refer to docs about custom roles.' ) self.metric_queries = [] self.errors = [] for mgroup in self._config.metric_groups: try: if not self._config.aggregate_last_24_hours: for query in range(len(METRIC_GROUPS[mgroup])): METRIC_GROUPS[mgroup][query]['query'] = METRIC_GROUPS[mgroup][query]['query'].replace( 'DATEADD(hour, -24, current_timestamp())', 'date_trunc(day, current_date)' ) self.metric_queries.extend(METRIC_GROUPS[mgroup]) except KeyError: self.errors.append(mgroup) if self.errors: self.log.warning('Invalid metric_groups found in snowflake conf.yaml: %s', (', '.join(self.errors))) if not self.metric_queries and not self._config.custom_queries_defined: raise ConfigurationError('No valid metric_groups or custom query configured, please list at least one.') self._query_manager = QueryManager(self, self.execute_query_raw, queries=self.metric_queries, tags=self._tags) self.check_initializations.append(self._query_manager.compile_queries)
def create_query_manager(*args, **kwargs): executor = kwargs.pop('executor', None) if executor is None: executor = mock_executor() check = kwargs.pop('check', None) or AgentCheck('test', {}, [{}]) check.check_id = 'test:instance' return QueryManager(check, executor, args, **kwargs)
def __init__(self, name, init_config, instances): super(ProxysqlCheck, self).__init__(name, init_config, instances) self.host = self.instance.get("host", "") self.port = int(self.instance.get("port", 0)) self.user = self.instance.get("username", "") self.password = str(self.instance.get("password", "")) if not all((self.host, self.port, self.user, self.password)): raise ConfigurationError( "ProxySQL host, port, username and password are needed") self.database_name = self.instance.get("database_name", "stats") self.tls_verify = self.instance.get("tls_verify", False) self.validate_hostname = self.instance.get("validate_hostname", True) self.tls_ca_cert = self.instance.get("tls_ca_cert") self.connect_timeout = self.instance.get("connect_timeout", 10) self.read_timeout = self.instance.get("read_timeout") self.tags = self.instance.get("tags", []) self.tags.append("proxysql_server:{}".format(self.host)) self.tags.append("proxysql_port:{}".format(self.port)) manager_queries = [STATS_MYSQL_GLOBAL] if self.is_metadata_collection_enabled(): # Add the query to collect the ProxySQL version manager_queries.append(VERSION_METADATA) additional_metrics = self.instance.get("additional_metrics", []) for additional_group in additional_metrics: if additional_group not in ADDITIONAL_METRICS_MAPPING: raise ConfigurationError( "There is no additional metric group called '{}' for the ProxySQL integration, it should be one " "of ({})".format( additional_group, ", ".join(ADDITIONAL_METRICS_MAPPING), )) manager_queries.append( ADDITIONAL_METRICS_MAPPING[additional_group]) self._connection = None self._query_manager = QueryManager(self, self.execute_query_raw, queries=manager_queries, tags=self.tags) self.check_initializations.append(self._query_manager.compile_queries)
def __init__(self, *args, **kwargs): super(SnowflakeCheck, self).__init__(*args, **kwargs) self._config = Config(self.instance) self._conn = None self.proxy_host = self.init_config.get('proxy_host', None) self.proxy_port = self.init_config.get('proxy_port', None) self.proxy_user = self.init_config.get('proxy_user', None) self.proxy_password = self.init_config.get('proxy_password', None) # Add default tags like account to all metrics self._tags = self._config.tags + [ 'account:{}'.format(self._config.account) ] if self._config.password: self.register_secret(self._config.password) if self._config.role == 'ACCOUNTADMIN': self.log.info( 'Snowflake `role` is set as `ACCOUNTADMIN` which should be used cautiously, ' 'refer to docs about custom roles.') self.metric_queries = [] self.errors = [] for mgroup in self._config.metric_groups: try: self.metric_queries.extend(METRIC_GROUPS[mgroup]) except KeyError: self.errors.append(mgroup) if self.errors: self.log.warning( 'Invalid metric_groups found in snowflake conf.yaml: %s', (', '.join(self.errors))) if not self.metric_queries: raise ConfigurationError( 'No valid metric_groups configured, please list at least one.') self._query_manager = QueryManager(self, self.execute_query_raw, queries=self.metric_queries, tags=self._tags) self.check_initializations.append(self._query_manager.compile_queries)
def __init__(self, name, init_config, instances): # type: (AnyStr, Dict[AnyStr, Any], List[Dict[AnyStr, Any]]) -> None super(SinglestoreCheck, self).__init__(name, init_config, instances) self.config = SingleStoreConfig(self.instance) self._connection = cast(pymysql.Connection, None) manager_queries = [] manager_queries.extend(DEFAULT_QUERIES) if self.config.collect_system_metrics: manager_queries.extend(ADDITIONAL_SYSTEM_QUERIES) self._query_manager = QueryManager(self, self.execute_query_raw, queries=manager_queries, tags=self.config.tags) self.check_initializations.append(self._query_manager.compile_queries) self._service_check_tags = [ 'singlestore_endpoint:{}:{}'.format(self.config.host, self.config.port) ] + self.config.tags
def __init__(self, name, init_config, instances): super(Oracle, self).__init__(name, init_config, instances) self._server = self.instance.get('server') self._user = self.instance.get('username') or self.instance.get('user') self._password = self.instance.get('password') self._service = self.instance.get('service_name') self._protocol = self.instance.get("protocol", PROTOCOL_TCP) self._jdbc_driver = self.instance.get('jdbc_driver_path') self._jdbc_truststore_path = self.instance.get('jdbc_truststore_path') self._jdbc_truststore_type = self.instance.get('jdbc_truststore_type') self._jdbc_truststore_password = self.instance.get( 'jdbc_truststore_password', '') self._tags = self.instance.get('tags') or [] self._service_check_tags = ['server:{}'.format(self._server)] self._service_check_tags.extend(self._tags) self._cached_connection = None manager_queries = [] if not self.instance.get('only_custom_queries', False): manager_queries.extend([ queries.ProcessMetrics, queries.SystemMetrics, queries.TableSpaceMetrics ]) self._fix_custom_queries() self._query_manager = QueryManager( self, self.execute_query_raw, queries=manager_queries, error_handler=self.handle_query_error, tags=self._tags, ) # Runtime validations are only py3, so this is for manually validating config on py2 if PY2: self.check_initializations.append(self.validate_config) self.check_initializations.append(self._query_manager.compile_queries) self._query_errors = 0 self._connection_errors = 0
def __init__(self, name, init_config, instances): super(ClickhouseCheck, self).__init__(name, init_config, instances) self._server = self.instance.get('server', '') self._port = self.instance.get('port') self._db = self.instance.get('db', 'default') self._user = self.instance.get('user', 'default') self._password = self.instance.get('password', '') self._connect_timeout = float(self.instance.get('connect_timeout', 10)) self._read_timeout = float(self.instance.get('read_timeout', 10)) self._compression = self.instance.get('compression', False) self._tls_verify = is_affirmative( self.instance.get('tls_verify', False)) self._tags = self.instance.get('tags', []) # Add global tags self._tags.append('server:{}'.format(self._server)) self._tags.append('port:{}'.format(self._port)) self._tags.append('db:{}'.format(self._db)) self._error_sanitizer = ErrorSanitizer(self._password) self.check_initializations.append(self.validate_config) # We'll connect on the first check run self._client = None self.check_initializations.append(self.create_connection) self._query_manager = QueryManager( self, self.execute_query_raw, queries=[ queries.SystemMetrics, queries.SystemEvents, queries.SystemAsynchronousMetrics, queries.SystemParts, queries.SystemReplicas, queries.SystemDictionaries, ], tags=self._tags, error_handler=self._error_sanitizer.clean, ) self.check_initializations.append(self._query_manager.compile_queries)
def __init__(self, name, init_config, instances): # type: (str, dict, list) -> None super(VoltDBCheck, self).__init__(name, init_config, instances) self._config = Config(cast(Instance, self.instance), debug=self.log.debug) self.register_secret(self._config.password) self._client = Client( url=self._config.url, http_get=self.http.get, username=self._config.username, password=self._config.password, password_hashed=self._config.password_hashed, ) self._query_manager = QueryManager( self, self._execute_query_raw, queries=self._config.queries, tags=self._config.tags, ) self.check_initializations.append(self._query_manager.compile_queries)
def __init__(self, name, init_config, instances): # type: (str, dict, list) -> None super(VoltDBCheck, self).__init__(name, init_config, instances) self._config = Config(cast(Instance, self.instance), debug=self.log.debug) self.register_secret(self._config.password) self._client = Client( url=self._config.url, http_get=self.http.get, username=self._config.username, password=self._config.password, password_hashed=self._config.password_hashed, ) manager_queries = [ queries.CPUMetrics, queries.MemoryMetrics, queries.SnapshotStatusMetrics, queries.CommandLogMetrics, queries.ProcedureMetrics, queries.LatencyMetrics, queries.GCMetrics, queries.IOStatsMetrics, queries.TableMetrics, queries.IndexMetrics, ] if BASE_PARSED_VERSION < pkg_resources.parse_version('15.0.0'): # On Agent < 7.24.0 we must to pass `Query` objects instead of dicts. manager_queries = [Query(query) for query in manager_queries] # type: ignore self._query_manager = QueryManager( self, self._execute_query_raw, queries=manager_queries, tags=self._config.tags, ) self.check_initializations.append(self._query_manager.compile_queries)
def __init__(self, name, init_config, instances): super(MySql, self).__init__(name, init_config, instances) self.qcache_stats = {} self.version = None self.is_mariadb = None self._resolved_hostname = None self._agent_hostname = None self._is_aurora = None self._config = MySQLConfig(self.instance) # Create a new connection on every check run self._conn = None self._query_manager = QueryManager(self, self.execute_query_raw, queries=[]) self.check_initializations.append(self._query_manager.compile_queries) self.innodb_stats = InnoDBMetrics() self.check_initializations.append(self._config.configuration_checks) self.performance_schema_enabled = None self._warnings_by_code = {} self._statement_metrics = MySQLStatementMetrics(self, self._config, self._get_connection_args()) self._statement_samples = MySQLStatementSamples(self, self._config, self._get_connection_args()) self._query_activity = MySQLActivity(self, self._config, self._get_connection_args())
def test_queries_are_copied(self): class MyCheck(AgentCheck): pass check1 = MyCheck('test', {}, [{}]) check2 = MyCheck('test', {}, [{}]) dummy_query = { 'name': 'test query', 'query': 'foo', 'columns': [ {'name': 'test.foo', 'type': 'gauge', 'tags': ['override:ok']}, {'name': 'test.baz', 'type': 'gauge', 'raw': True}, ], 'tags': ['test:bar'], } query_manager1 = QueryManager(check1, mock_executor(), [dummy_query]) query_manager2 = QueryManager(check2, mock_executor(), [dummy_query]) query_manager1.compile_queries() query_manager2.compile_queries() assert not id(query_manager1.queries[0]) == id( query_manager2.queries[0] ), "QueryManager does not copy the queries"
def test_custom_metrics_multiple_results(aggregator, check): con = mock.MagicMock() cursor = mock.MagicMock() data = [["tag_value1", "1"], ["tag_value2", "2"]] cursor.fetchall.side_effect = lambda: iter(data) con.cursor.return_value = cursor custom_queries = [{ "metric_prefix": "oracle.test1", "query": "mocked", "columns": [{ "name": "tag_name", "type": "tag" }, { "name": "metric", "type": "gauge" }], "tags": ["query_tags1"], }] check.instance['custom_queries'] = custom_queries check._fix_custom_queries() check._connection = con query_manager = QueryManager(check, check.execute_query_raw, tags=['custom_tag']) query_manager.compile_queries() query_manager.execute() aggregator.assert_metric( "oracle.test1.metric", value=1, count=1, tags=["tag_name:tag_value1", "query_tags1", "custom_tag"]) aggregator.assert_metric( "oracle.test1.metric", value=2, count=1, tags=["tag_name:tag_value2", "query_tags1", "custom_tag"])
class Oracle(AgentCheck): __NAMESPACE__ = 'oracle' ORACLE_DRIVER_CLASS = "oracle.jdbc.OracleDriver" JDBC_CONNECT_STRING = "jdbc:oracle:thin:@//{}/{}" CX_CONNECT_STRING = "{}/{}@//{}/{}" SERVICE_CHECK_NAME = 'can_connect' def __init__(self, name, init_config, instances): super(Oracle, self).__init__(name, init_config, instances) ( self._server, self._user, self._password, self._service, self._jdbc_driver, self._tags, only_custom_queries, ) = self._get_config(self.instance) self.check_initializations.append(self.validate_config) self._connection = None manager_queries = [] if not only_custom_queries: manager_queries.extend([ queries.ProcessMetrics, queries.SystemMetrics, queries.TableSpaceMetrics ]) self._fix_custom_queries() self._query_manager = QueryManager( self, self.execute_query_raw, queries=manager_queries, tags=self._tags, ) self.check_initializations.append(self._query_manager.compile_queries) def _fix_custom_queries(self): """ For backward compatibility reasons, if a custom query specifies a `metric_prefix`, change the submission name to contain it. """ custom_queries = self.instance.get('custom_queries', []) global_custom_queries = self.init_config.get('global_custom_queries', []) for query in itertools.chain(custom_queries, global_custom_queries): prefix = query.get('metric_prefix') if prefix and prefix != self.__NAMESPACE__: if prefix.startswith(self.__NAMESPACE__ + '.'): prefix = prefix[len(self.__NAMESPACE__) + 1:] for column in query.get('columns', []): if column.get('type') != 'tag': column['name'] = '{}.{}'.format(prefix, column['name']) def validate_config(self): if not self._server or not self._user: raise ConfigurationError("Oracle host and user are needed") def execute_query_raw(self, query): with closing(self._connection.cursor()) as cursor: cursor.execute(query) # JDBC doesn't support iter protocol return cursor.fetchall() def check(self, _): self.create_connection() with closing(self._connection): self._query_manager.execute() self._connection = None def _get_config(self, instance): server = instance.get('server') user = instance.get('user') password = instance.get('password') service = instance.get('service_name') jdbc_driver = instance.get('jdbc_driver_path') tags = instance.get('tags') or [] only_custom_queries = instance.get('only_custom_queries', False) return server, user, password, service, jdbc_driver, tags, only_custom_queries def create_connection(self): service_check_tags = ['server:%s' % self._server] service_check_tags.extend(self._tags) try: # Check if the instantclient is available cx_Oracle.clientversion() except cx_Oracle.DatabaseError as e: # Fallback to JDBC use_oracle_client = False self.log.debug( 'Oracle instant client unavailable, falling back to JDBC: %s', e) connect_string = self.JDBC_CONNECT_STRING.format( self._server, self._service) else: use_oracle_client = True self.log.debug('Running cx_Oracle version %s', cx_Oracle.version) connect_string = self.CX_CONNECT_STRING.format( self._user, self._password, self._server, self._service) try: if use_oracle_client: connection = cx_Oracle.connect(connect_string) elif JDBC_IMPORT_ERROR: self.log.error( "Oracle client is unavailable and the integration is unable to import JDBC libraries. You may not " "have the Microsoft Visual C++ Runtime 2015 installed on your system. Please double check your " "installation and refer to the Datadog documentation for more information." ) raise JDBC_IMPORT_ERROR else: try: if jpype.isJVMStarted( ) and not jpype.isThreadAttachedToJVM(): jpype.attachThreadToJVM() jpype.java.lang.Thread.currentThread( ).setContextClassLoader( jpype.java.lang.ClassLoader.getSystemClassLoader()) connection = jdb.connect(self.ORACLE_DRIVER_CLASS, connect_string, [self._user, self._password], self._jdbc_driver) except Exception as e: if "Class {} not found".format( self.ORACLE_DRIVER_CLASS) in str(e): msg = """Cannot run the Oracle check until either the Oracle instant client or the JDBC Driver is available. For the Oracle instant client, see: http://www.oracle.com/technetwork/database/features/instant-client/index.html You will also need to ensure the `LD_LIBRARY_PATH` is also updated so the libs are reachable. For the JDBC Driver, see: http://www.oracle.com/technetwork/database/application-development/jdbc/downloads/index.html You will also need to ensure the jar is either listed in your $CLASSPATH or in the yaml configuration file of the check. """ self.log.error(msg) raise self.log.debug("Connected to Oracle DB") self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.OK, tags=service_check_tags) except Exception as e: self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL, tags=service_check_tags) self.log.error(e) raise self._connection = connection
class SnowflakeCheck(AgentCheck): """ Collect Snowflake account usage metrics """ __NAMESPACE__ = 'snowflake' SERVICE_CHECK_CONNECT = 'snowflake.can_connect' def __init__(self, *args, **kwargs): super(SnowflakeCheck, self).__init__(*args, **kwargs) self._config = Config(self.instance) self._conn = None self.proxy_host = self.init_config.get('proxy_host', None) self.proxy_port = self.init_config.get('proxy_port', None) self.proxy_user = self.init_config.get('proxy_user', None) self.proxy_password = self.init_config.get('proxy_password', None) # Add default tags like account to all metrics self._tags = self._config.tags + [ 'account:{}'.format(self._config.account) ] if self._config.password: self.register_secret(self._config.password) if self._config.role == 'ACCOUNTADMIN': self.log.info( 'Snowflake `role` is set as `ACCOUNTADMIN` which should be used cautiously, ' 'refer to docs about custom roles.') self.metric_queries = [] self.errors = [] for mgroup in self._config.metric_groups: try: self.metric_queries.extend(METRIC_GROUPS[mgroup]) except KeyError: self.errors.append(mgroup) if self.errors: self.log.warning( 'Invalid metric_groups found in snowflake conf.yaml: %s', (', '.join(self.errors))) if not self.metric_queries: raise ConfigurationError( 'No valid metric_groups configured, please list at least one.') self._query_manager = QueryManager(self, self.execute_query_raw, queries=self.metric_queries, tags=self._tags) self.check_initializations.append(self._query_manager.compile_queries) def check(self, _): self.connect() if self._conn is not None: # Execute queries self._query_manager.execute() self._collect_version() self.log.debug("Closing connection to Snowflake...") self._conn.close() def execute_query_raw(self, query): """ Executes query with timestamp from parts if comparing start_time field. """ with closing(self._conn.cursor()) as cursor: cursor.execute(query) if cursor.rowcount is None or cursor.rowcount < 1: self.log.debug("Failed to fetch records from query: `%s`", query) return [] return cursor.fetchall() def connect(self): self.log.debug( "Establishing a new connection to Snowflake: account=%s, user=%s, database=%s, schema=%s, warehouse=%s, " "role=%s, timeout=%s, authenticator=%s, ocsp_response_cache_filename=%s, proxy_host=%s, proxy_port=%s", self._config.account, self._config.user, self._config.database, self._config.schema, self._config.warehouse, self._config.role, self._config.login_timeout, self._config.authenticator, self._config.ocsp_response_cache_filename, self.proxy_host, self.proxy_port, ) try: conn = sf.connect( user=self._config.user, password=self._config.password, account=self._config.account, database=self._config.database, schema=self._config.schema, warehouse=self._config.warehouse, role=self._config.role, passcode_in_password=self._config.passcode_in_password, passcode=self._config.passcode, client_prefetch_threads=self._config.client_prefetch_threads, login_timeout=self._config.login_timeout, ocsp_response_cache_filename=self._config. ocsp_response_cache_filename, authenticator=self._config.authenticator, token=self._config.token, client_session_keep_alive=self._config.client_keep_alive, proxy_host=self.proxy_host, proxy_port=self.proxy_port, proxy_user=self.proxy_user, proxy_password=self.proxy_password, ) except Exception as e: msg = "Unable to connect to Snowflake: {}".format(e) self.service_check(self.SERVICE_CHECK_CONNECT, self.CRITICAL, message=msg, tags=self._tags) self.warning(msg) else: self.service_check(self.SERVICE_CHECK_CONNECT, self.OK, tags=self._tags) self._conn = conn @AgentCheck.metadata_entrypoint def _collect_version(self): try: raw_version = self.execute_query_raw("select current_version();") version = raw_version[0][0] except Exception as e: self.log.error("Error collecting version for Snowflake: %s", e) else: if version: self.set_metadata('version', version)
class MySql(AgentCheck): SERVICE_CHECK_NAME = 'mysql.can_connect' SLAVE_SERVICE_CHECK_NAME = 'mysql.replication.slave_running' DEFAULT_MAX_CUSTOM_QUERIES = 20 def __init__(self, name, init_config, instances): super(MySql, self).__init__(name, init_config, instances) self.qcache_stats = {} self.version = None self.config = MySQLConfig(self.instance) # Create a new connection on every check run self._conn = None self._query_manager = QueryManager(self, self.execute_query_raw, queries=[], tags=self.config.tags) self._statement_metrics = MySQLStatementMetrics(self.config) self.check_initializations.append(self._query_manager.compile_queries) self.innodb_stats = InnoDBMetrics() self.check_initializations.append(self.config.configuration_checks) def execute_query_raw(self, query): with closing(self._conn.cursor(pymysql.cursors.SSCursor)) as cursor: cursor.execute(query) for row in cursor.fetchall_unbuffered(): yield row @AgentCheck.metadata_entrypoint def _send_metadata(self): self.set_metadata('version', self.version.version + '+' + self.version.build) self.set_metadata('flavor', self.version.flavor) @classmethod def get_library_versions(cls): return {'pymysql': pymysql.__version__} def check(self, _): self._set_qcache_stats() with self._connect() as db: try: self._conn = db # version collection self.version = get_version(db) self._send_metadata() # Metric collection self._collect_metrics(db) self._collect_system_metrics(self.config.host, db, self.config.tags) if self.config.deep_database_monitoring: self._collect_statement_metrics(db, self.config.tags) # keeping track of these: self._put_qcache_stats() # Custom queries self._query_manager.execute() except Exception as e: self.log.exception("error!") raise e finally: self._conn = None def _set_qcache_stats(self): host_key = self._get_host_key() qcache_st = self.qcache_stats.get(host_key, (None, None, None)) self._qcache_hits = qcache_st[0] self._qcache_inserts = qcache_st[1] self._qcache_not_cached = qcache_st[2] def _put_qcache_stats(self): host_key = self._get_host_key() self.qcache_stats[host_key] = (self._qcache_hits, self._qcache_inserts, self._qcache_not_cached) def _get_host_key(self): if self.config.defaults_file: return self.config.defaults_file hostkey = self.config.host if self.config.mysql_sock: hostkey = "{0}:{1}".format(hostkey, self.config.mysql_sock) elif self.config.port: hostkey = "{0}:{1}".format(hostkey, self.config.port) return hostkey def _get_connection_args(self): ssl = dict(self.config.ssl) if self.config.ssl else None connection_args = { 'ssl': ssl, 'connect_timeout': self.config.connect_timeout, } if self.config.charset: connection_args['charset'] = self.config.charset if self.config.defaults_file != '': connection_args['read_default_file'] = self.config.defaults_file return connection_args connection_args.update({ 'user': self.config.user, 'passwd': self.config.password }) if self.config.mysql_sock != '': self.service_check_tags = [ 'server:{0}'.format(self.config.mysql_sock), 'port:unix_socket', ] + self.config.tags connection_args.update({'unix_socket': self.config.mysql_sock}) else: connection_args.update({'host': self.config.host}) if self.config.port: connection_args.update({'port': self.config.port}) return connection_args @contextmanager def _connect(self): service_check_tags = [ 'server:{0}'.format( (self.config.mysql_sock if self.config.mysql_sock != '' else self.config.host)), 'port:{}'.format( self.config.port if self.config.port else 'unix_socket'), ] + self.config.tags db = None try: connect_args = self._get_connection_args() db = pymysql.connect(**connect_args) self.log.debug("Connected to MySQL") self.service_check_tags = list(set(service_check_tags)) self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.OK, tags=service_check_tags) yield db except Exception: self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.CRITICAL, tags=service_check_tags) raise finally: if db: db.close() def _collect_metrics(self, db): # Get aggregate of all VARS we want to collect metrics = STATUS_VARS # collect results from db results = self._get_stats_from_status(db) results.update(self._get_stats_from_variables(db)) if not is_affirmative( self.config.options.get( 'disable_innodb_metrics', False)) and self._is_innodb_engine_enabled(db): results.update(self.innodb_stats.get_stats_from_innodb_status(db)) self.innodb_stats.process_innodb_stats(results, self.config.options, metrics) # Binary log statistics if self._get_variable_enabled(results, 'log_bin'): results['Binlog_space_usage_bytes'] = self._get_binary_log_stats( db) # Compute key cache utilization metric key_blocks_unused = collect_scalar('Key_blocks_unused', results) key_cache_block_size = collect_scalar('key_cache_block_size', results) key_buffer_size = collect_scalar('key_buffer_size', results) results['Key_buffer_size'] = key_buffer_size try: # can be null if the unit is missing in the user config (4 instead of 4G for eg.) if key_buffer_size != 0: key_cache_utilization = 1 - ( (key_blocks_unused * key_cache_block_size) / key_buffer_size) results['Key_cache_utilization'] = key_cache_utilization results['Key_buffer_bytes_used'] = collect_scalar( 'Key_blocks_used', results) * key_cache_block_size results['Key_buffer_bytes_unflushed'] = ( collect_scalar('Key_blocks_not_flushed', results) * key_cache_block_size) except TypeError as e: self.log.error( "Not all Key metrics are available, unable to compute: %s", e) metrics.update(VARIABLES_VARS) metrics.update(INNODB_VARS) metrics.update(BINLOG_VARS) if is_affirmative( self.config.options.get('extra_status_metrics', False)): self.log.debug("Collecting Extra Status Metrics") metrics.update(OPTIONAL_STATUS_VARS) if self.version.version_compatible((5, 6, 6)): metrics.update(OPTIONAL_STATUS_VARS_5_6_6) if is_affirmative(self.config.options.get('galera_cluster', False)): # already in result-set after 'SHOW STATUS' just add vars to collect self.log.debug("Collecting Galera Metrics.") metrics.update(GALERA_VARS) performance_schema_enabled = self._get_variable_enabled( results, 'performance_schema') above_560 = self.version.version_compatible((5, 6, 0)) if (is_affirmative( self.config.options.get('extra_performance_metrics', False)) and above_560 and performance_schema_enabled): # report avg query response time per schema to Datadog results[ 'perf_digest_95th_percentile_avg_us'] = self._get_query_exec_time_95th_us( db) results['query_run_time_avg'] = self._query_exec_time_per_schema( db) metrics.update(PERFORMANCE_VARS) if is_affirmative(self.config.options.get('schema_size_metrics', False)): # report avg query response time per schema to Datadog results['information_schema_size'] = self._query_size_per_schema( db) metrics.update(SCHEMA_VARS) if is_affirmative(self.config.options.get('replication', False)): replication_metrics = self._collect_replication_metrics( db, results, above_560) metrics.update(replication_metrics) self._check_replication_status(results) # "synthetic" metrics metrics.update(SYNTHETIC_VARS) self._compute_synthetic_results(results) # remove uncomputed metrics for k in SYNTHETIC_VARS: if k not in results: metrics.pop(k, None) # add duped metrics - reporting some as both rate and gauge dupes = [ ('Table_locks_waited', 'Table_locks_waited_rate'), ('Table_locks_immediate', 'Table_locks_immediate_rate'), ] for src, dst in dupes: if src in results: results[dst] = results[src] self._submit_metrics(metrics, results, self.config.tags) # Collect custom query metrics # Max of 20 queries allowed if isinstance(self.config.queries, list): for check in self.config.queries[:self.config.max_custom_queries]: total_tags = self.config.tags + check.get('tags', []) self._collect_dict(check['type'], {check['field']: check['metric']}, check['query'], db, tags=total_tags) if len(self.config.queries) > self.config.max_custom_queries: self.warning( "Maximum number (%s) of custom queries reached. Skipping the rest.", self.config.max_custom_queries) def _collect_replication_metrics(self, db, results, above_560): # Get replica stats is_mariadb = self.version.flavor == "MariaDB" replication_channel = self.config.options.get('replication_channel') results.update( self._get_replica_stats(db, is_mariadb, replication_channel)) nonblocking = is_affirmative( self.config.options.get('replication_non_blocking_status', False)) results.update(self._get_slave_status(db, above_560, nonblocking)) return REPLICA_VARS def _check_replication_status(self, results): # get slave running form global status page slave_running_status = AgentCheck.UNKNOWN # Slave_IO_Running: Whether the I/O thread for reading the source's binary log is running. # You want this to be Yes unless you have not yet started replication or have explicitly stopped it. slave_io_running = collect_type('Slave_IO_Running', results, dict) # Slave_SQL_Running: Whether the SQL thread for executing events in the relay log is running. slave_sql_running = collect_type('Slave_SQL_Running', results, dict) if slave_io_running: slave_io_running = any(v.lower().strip() == 'yes' for v in itervalues(slave_io_running)) if slave_sql_running: slave_sql_running = any(v.lower().strip() == 'yes' for v in itervalues(slave_sql_running)) binlog_running = results.get('Binlog_enabled', False) # slaves will only be collected iff user has PROCESS privileges. slaves = collect_scalar('Slaves_connected', results) if not (slave_io_running is None and slave_sql_running is None): if not slave_io_running and not slave_sql_running: self.log.debug( "Slave_IO_Running and Slave_SQL_Running are not ok") slave_running_status = AgentCheck.CRITICAL if not slave_io_running or not slave_sql_running: self.log.debug( "Either Slave_IO_Running or Slave_SQL_Running are not ok") slave_running_status = AgentCheck.WARNING if slave_running_status == AgentCheck.UNKNOWN: if self._is_master(slaves, results): # master if slaves > 0 and binlog_running: self.log.debug( "Host is master, there are replicas and binlog is running" ) slave_running_status = AgentCheck.OK else: slave_running_status = AgentCheck.WARNING else: # replica (or standalone) if not (slave_io_running is None and slave_sql_running is None): if slave_io_running and slave_sql_running: self.log.debug( "Slave_IO_Running and Slave_SQL_Running are ok") slave_running_status = AgentCheck.OK # deprecated in favor of service_check("mysql.replication.slave_running") self.gauge(self.SLAVE_SERVICE_CHECK_NAME, 1 if slave_running_status == AgentCheck.OK else 0, tags=self.config.tags) self.service_check(self.SLAVE_SERVICE_CHECK_NAME, slave_running_status, tags=self.service_check_tags) def _collect_statement_metrics(self, db, tags): tags = self.service_check_tags + tags metrics = self._statement_metrics.collect_per_statement_metrics(db) for metric_name, metric_value, metric_tags in metrics: self.count(metric_name, metric_value, tags=list(set(tags + metric_tags))) def _is_master(self, slaves, results): # master uuid only collected in slaves master_host = collect_string('Master_Host', results) if slaves > 0 or not master_host: return True return False def _submit_metrics(self, variables, db_results, tags): for variable, metric in iteritems(variables): metric_name, metric_type = metric for tag, value in collect_all_scalars(variable, db_results): metric_tags = list(tags) if tag: metric_tags.append(tag) if value is not None: if metric_type == RATE: self.rate(metric_name, value, tags=metric_tags) elif metric_type == GAUGE: self.gauge(metric_name, value, tags=metric_tags) elif metric_type == COUNT: self.count(metric_name, value, tags=metric_tags) elif metric_type == MONOTONIC: self.monotonic_count(metric_name, value, tags=metric_tags) def _collect_dict(self, metric_type, field_metric_map, query, db, tags): """ Query status and get a dictionary back. Extract each field out of the dictionary and stuff it in the corresponding metric. query: show status... field_metric_map: {"Seconds_behind_master": "mysqlSecondsBehindMaster"} """ try: with closing(db.cursor()) as cursor: cursor.execute(query) result = cursor.fetchone() if result is not None: for field, metric in list(iteritems(field_metric_map)): # Find the column name in the cursor description to identify the column index # http://www.python.org/dev/peps/pep-0249/ # cursor.description is a tuple of (column_name, ..., ...) try: col_idx = [ d[0].lower() for d in cursor.description ].index(field.lower()) self.log.debug("Collecting metric: %s", metric) if result[col_idx] is not None: self.log.debug("Collecting done, value %s", result[col_idx]) if metric_type == GAUGE: self.gauge(metric, float(result[col_idx]), tags=tags) elif metric_type == RATE: self.rate(metric, float(result[col_idx]), tags=tags) else: self.gauge(metric, float(result[col_idx]), tags=tags) else: self.log.debug( "Received value is None for index %d", col_idx) except ValueError: self.log.exception( "Cannot find %s in the columns %s", field, cursor.description) except Exception: self.warning("Error while running %s\n%s", query, traceback.format_exc()) self.log.exception("Error while running %s", query) def _collect_system_metrics(self, host, db, tags): pid = None # The server needs to run locally, accessed by TCP or socket if host in ["localhost", "127.0.0.1", "0.0.0.0"] or db.port == long(0): pid = self._get_server_pid(db) if pid: self.log.debug("System metrics for mysql w/ pid: %s", pid) # At last, get mysql cpu data out of psutil or procfs try: ucpu, scpu = None, None if PSUTIL_AVAILABLE: proc = psutil.Process(pid) ucpu = proc.cpu_times()[0] scpu = proc.cpu_times()[1] if ucpu and scpu: self.rate("mysql.performance.user_time", ucpu, tags=tags) # should really be system_time self.rate("mysql.performance.kernel_time", scpu, tags=tags) self.rate("mysql.performance.cpu_time", ucpu + scpu, tags=tags) except Exception: self.warning( "Error while reading mysql (pid: %s) procfs data\n%s", pid, traceback.format_exc()) def _get_pid_file_variable(self, db): """ Get the `pid_file` variable """ pid_file = None try: with closing(db.cursor()) as cursor: cursor.execute("SHOW VARIABLES LIKE 'pid_file'") pid_file = cursor.fetchone()[1] except Exception: self.warning("Error while fetching pid_file variable of MySQL.") return pid_file def _get_server_pid(self, db): pid = None # Try to get pid from pid file, it can fail for permission reason pid_file = self._get_pid_file_variable(db) if pid_file is not None: self.log.debug("pid file: %s", str(pid_file)) try: with open(pid_file, 'rb') as f: pid = int(f.readline()) except IOError: self.log.debug("Cannot read mysql pid file %s", pid_file) # If pid has not been found, read it from ps if pid is None and PSUTIL_AVAILABLE: for proc in psutil.process_iter(): try: if proc.name() == PROC_NAME: pid = proc.pid except (psutil.AccessDenied, psutil.ZombieProcess, psutil.NoSuchProcess): continue except Exception: self.log.exception( "Error while fetching mysql pid from psutil") return pid @classmethod def _get_stats_from_status(cls, db): with closing(db.cursor()) as cursor: cursor.execute("SHOW /*!50002 GLOBAL */ STATUS;") results = dict(cursor.fetchall()) return results @classmethod def _get_stats_from_variables(cls, db): with closing(db.cursor()) as cursor: cursor.execute("SHOW GLOBAL VARIABLES;") results = dict(cursor.fetchall()) return results def _get_binary_log_stats(self, db): try: with closing(db.cursor()) as cursor: cursor.execute("SHOW BINARY LOGS;") cursor_results = cursor.fetchall() master_logs = { result[0]: result[1] for result in cursor_results } binary_log_space = 0 for value in itervalues(master_logs): binary_log_space += value return binary_log_space except (pymysql.err.InternalError, pymysql.err.OperationalError) as e: self.warning( "Privileges error accessing the BINARY LOGS (must grant REPLICATION CLIENT): %s", e) return None def _is_innodb_engine_enabled(self, db): # Whether InnoDB engine is available or not can be found out either # from the output of SHOW ENGINES or from information_schema.ENGINES # table. Later is choosen because that involves no string parsing. try: with closing(db.cursor()) as cursor: cursor.execute(SQL_INNODB_ENGINES) return cursor.rowcount > 0 except (pymysql.err.InternalError, pymysql.err.OperationalError, pymysql.err.NotSupportedError) as e: self.warning( "Possibly innodb stats unavailable - error querying engines table: %s", e) return False def _get_replica_stats(self, db, is_mariadb, replication_channel): replica_results = defaultdict(dict) try: with closing(db.cursor(pymysql.cursors.DictCursor)) as cursor: if is_mariadb and replication_channel: cursor.execute( "SET @@default_master_connection = '{0}';".format( replication_channel)) cursor.execute("SHOW SLAVE STATUS;") elif replication_channel: cursor.execute( "SHOW SLAVE STATUS FOR CHANNEL '{0}';".format( replication_channel)) else: cursor.execute("SHOW SLAVE STATUS;") results = cursor.fetchall() self.log.debug("Getting replication status: %s", results) for slave_result in results: # MySQL <5.7 does not have Channel_Name. # For MySQL >=5.7 'Channel_Name' is set to an empty string by default channel = replication_channel or slave_result.get( 'Channel_Name') or 'default' for key, value in iteritems(slave_result): if value is not None: replica_results[key]['channel:{0}'.format( channel)] = value except (pymysql.err.InternalError, pymysql.err.OperationalError) as e: errno, msg = e.args if errno == 1617 and msg == "There is no master connection '{0}'".format( replication_channel): # MariaDB complains when you try to get slave status with a # connection name on the master, without connection name it # responds an empty string as expected. # Mysql behaves the same with or without connection name. pass else: self.warning( "Privileges error getting replication status (must grant REPLICATION CLIENT): %s", e) try: with closing(db.cursor(pymysql.cursors.DictCursor)) as cursor: cursor.execute("SHOW MASTER STATUS;") binlog_results = cursor.fetchone() if binlog_results: replica_results.update({'Binlog_enabled': True}) except (pymysql.err.InternalError, pymysql.err.OperationalError) as e: self.warning( "Privileges error getting binlog information (must grant REPLICATION CLIENT): %s", e) return replica_results def _get_slave_status(self, db, above_560, nonblocking): """ Retrieve the slaves' statuses using: 1. The `performance_schema.threads` table. Non-blocking, requires version > 5.6.0 2. The `information_schema.processlist` table. Blocking """ try: with closing(db.cursor()) as cursor: if above_560 and nonblocking: # Query `performance_schema.threads` instead of ` # information_schema.processlist` to avoid mutex impact on performance. cursor.execute(SQL_WORKER_THREADS) else: cursor.execute(SQL_PROCESS_LIST) slave_results = cursor.fetchall() slaves = 0 for _ in slave_results: slaves += 1 return {'Slaves_connected': slaves} except (pymysql.err.InternalError, pymysql.err.OperationalError) as e: self.warning( "Privileges error accessing the process tables (must grant PROCESS): %s", e) return {} @classmethod def _are_values_numeric(cls, array): return all(v.isdigit() for v in array) def _get_variable_enabled(self, results, var): enabled = collect_string(var, results) return enabled and enabled.lower().strip() == 'on' def _get_query_exec_time_95th_us(self, db): # Fetches the 95th percentile query execution time and returns the value # in microseconds try: with closing(db.cursor()) as cursor: cursor.execute(SQL_95TH_PERCENTILE) if cursor.rowcount < 1: self.warning( "Failed to fetch records from the perf schema \ 'events_statements_summary_by_digest' table.") return None row = cursor.fetchone() query_exec_time_95th_per = row[0] return query_exec_time_95th_per except (pymysql.err.InternalError, pymysql.err.OperationalError) as e: self.warning( "95th percentile performance metrics unavailable at this time: %s", e) return None def _query_exec_time_per_schema(self, db): # Fetches the avg query execution time per schema and returns the # value in microseconds try: with closing(db.cursor()) as cursor: cursor.execute(SQL_AVG_QUERY_RUN_TIME) if cursor.rowcount < 1: self.warning( "Failed to fetch records from the perf schema \ 'events_statements_summary_by_digest' table.") return None schema_query_avg_run_time = {} for row in cursor.fetchall(): schema_name = str(row[0]) avg_us = long(row[1]) # set the tag as the dictionary key schema_query_avg_run_time["schema:{0}".format( schema_name)] = avg_us return schema_query_avg_run_time except (pymysql.err.InternalError, pymysql.err.OperationalError) as e: self.warning( "Avg exec time performance metrics unavailable at this time: %s", e) return None def _query_size_per_schema(self, db): # Fetches the avg query execution time per schema and returns the # value in microseconds try: with closing(db.cursor()) as cursor: cursor.execute(SQL_QUERY_SCHEMA_SIZE) if cursor.rowcount < 1: self.warning( "Failed to fetch records from the information schema 'tables' table." ) return None schema_size = {} for row in cursor.fetchall(): schema_name = str(row[0]) size = long(row[1]) # set the tag as the dictionary key schema_size["schema:{0}".format(schema_name)] = size return schema_size except (pymysql.err.InternalError, pymysql.err.OperationalError) as e: self.warning( "Avg exec time performance metrics unavailable at this time: %s", e) return {} def _compute_synthetic_results(self, results): if ('Qcache_hits' in results) and ('Qcache_inserts' in results) and ('Qcache_not_cached' in results): if not int(results['Qcache_hits']): results['Qcache_utilization'] = 0 else: results['Qcache_utilization'] = ( float(results['Qcache_hits']) / (int(results['Qcache_inserts']) + int(results['Qcache_not_cached']) + int(results['Qcache_hits'])) * 100) if all(v is not None for v in (self._qcache_hits, self._qcache_inserts, self._qcache_not_cached)): if not (int(results['Qcache_hits']) - self._qcache_hits): results['Qcache_instant_utilization'] = 0 else: top = float(results['Qcache_hits']) - self._qcache_hits bottom = ( (int(results['Qcache_inserts']) - self._qcache_inserts) + (int(results['Qcache_not_cached']) - self._qcache_not_cached) + (int(results['Qcache_hits']) - self._qcache_hits)) results['Qcache_instant_utilization'] = (top / bottom) * 100 # update all three, or none - for consistent samples. self._qcache_hits = int(results['Qcache_hits']) self._qcache_inserts = int(results['Qcache_inserts']) self._qcache_not_cached = int(results['Qcache_not_cached'])
class TeradataCheck(AgentCheck, ConfigMixin): __NAMESPACE__ = 'teradata' def __init__(self, name, init_config, instances): super(TeradataCheck, self).__init__(name, init_config, instances) self._connect_params = None self._connection = None self._tags = [] self._query_errors = 0 self._tables_filter = None manager_queries = deepcopy(DEFAULT_QUERIES) if is_affirmative(self.instance.get('collect_res_usage_metrics', False)): manager_queries.extend(COLLECT_RES_USAGE) if is_affirmative( self.instance.get('collect_table_disk_metrics', False)): manager_queries.extend(COLLECT_ALL_SPACE) self._query_manager = QueryManager( self, self._execute_query_raw, queries=manager_queries, tags=self._tags, error_handler=self._executor_error_handler, ) self.check_initializations.append(self.initialize_config) self.check_initializations.append(self._query_manager.compile_queries) def check(self, _): # type: (Any) -> None self._query_errors = 0 try: with self.connect() as conn: if conn: self._connection = conn self._query_manager.execute() self.submit_health_checks() except Exception as e: self.service_check(SERVICE_CHECK_CONNECT, ServiceCheck.CRITICAL, tags=self._tags) raise e def initialize_config(self): # type: (Any) -> None self._connect_params = json.dumps({ 'host': self.config.server, 'account': self.config.account, 'database': self.config.database, 'dbs_port': str(self.config.port), 'logmech': self.config.auth_mechanism, 'logdata': self.config.auth_data, 'user': self.config.username, 'password': self.config.password, 'https_port': str(self.config.https_port), 'sslmode': self.config.ssl_mode, 'sslprotocol': self.config.ssl_protocol, }) global_tags = [ 'teradata_server:{}'.format(self.instance.get('server')), 'teradata_port:{}'.format(self.instance.get('port', 1025)), ] self._tags = list(self.config.tags) self._tags.extend(global_tags) self._query_manager.tags = self._tags self._tables_filter = create_tables_filter(self.config.tables) def _execute_query_raw(self, query): # type: (AnyStr) -> Iterable[Sequence] with closing(self._connection.cursor()) as cursor: query = query.format(self.config.database) cursor.execute(query) if cursor.rowcount < 1: self._query_errors += 1 self.log.warning('Failed to fetch records from query: `%s`.', query) return None for row in cursor.fetchall(): query_name = re.search(r'(DBC.[^\s]+)', query).group(1) try: yield self._queries_processor(row, query_name) except Exception as e: self.log.debug( 'Unable to process row returned from query "%s", skipping row %s. %s', query_name, row, e) yield row def _executor_error_handler(self, error): # type: (AnyStr) -> AnyStr self._query_errors += 1 return error @contextmanager def connect(self): # type: () -> Iterator[teradatasql.connection] conn = None if TERADATASQL_IMPORT_ERROR: self.log.error( 'Teradata SQL Driver module is unavailable. Please double check your installation and refer to the ' 'Datadog documentation for more information. %s', TERADATASQL_IMPORT_ERROR, ) raise TERADATASQL_IMPORT_ERROR self.log.info('Connecting to Teradata database %s on server %s.', self.config.database, self.config.server) try: conn = teradatasql.connect(self._connect_params) self.log.info('Connected to Teradata.') yield conn except Exception as e: self.log.error('Unable to connect to Teradata. %s.', e) raise e finally: if conn: conn.close() def submit_health_checks(self): # type: () -> None connect_status = ServiceCheck.OK query_status = ServiceCheck.CRITICAL if self._query_errors else ServiceCheck.OK self.service_check(SERVICE_CHECK_QUERY, query_status, tags=self._tags) self.service_check(SERVICE_CHECK_CONNECT, connect_status, tags=self._tags) def _queries_processor(self, row, query_name): # type: (Sequence, AnyStr) -> Sequence """ Validate timestamps, filter tables, and normalize empty tags. """ unprocessed_row = row # Return database version immediately if query_name == 'DBC.DBCInfoV': submit_version(self, row) return unprocessed_row # Only Resource Usage rows include timestamps and also do not include tags. if query_name == 'DBC.ResSpmaView': processed_row = timestamp_validator(self, unprocessed_row) return processed_row # Only AllSpaceV rows include table tags if (query_name == 'DBC.AllSpaceV' and is_affirmative(self.config.collect_table_disk_metrics) and self._tables_filter): tables_filtered_row = filter_tables(self._tables_filter, unprocessed_row) if tables_filtered_row: processed_row = tags_normalizer(tables_filtered_row, query_name) return processed_row # Discard row if empty (table is filtered out) return tables_filtered_row processed_row = tags_normalizer(unprocessed_row, query_name) self.log.trace('Row processor returned: %s. \nFrom query: "%s"', processed_row, query_name) return processed_row
class SQLServer(AgentCheck): __NAMESPACE__ = 'sqlserver' def __init__(self, name, init_config, instances): super(SQLServer, self).__init__(name, init_config, instances) self.connection = None self.failed_connections = {} self.instance_metrics = [] self.instance_per_type_metrics = defaultdict(list) self.do_check = True self.autodiscovery = is_affirmative(self.instance.get('database_autodiscovery')) self.autodiscovery_include = self.instance.get('autodiscovery_include', ['.*']) self.autodiscovery_exclude = self.instance.get('autodiscovery_exclude', []) self._compile_patterns() self.autodiscovery_interval = self.instance.get('autodiscovery_interval', DEFAULT_AUTODISCOVERY_INTERVAL) self.databases = set() self.ad_last_check = 0 self.proc = self.instance.get('stored_procedure') self.proc_type_mapping = {'gauge': self.gauge, 'rate': self.rate, 'histogram': self.histogram} self.custom_metrics = init_config.get('custom_metrics', []) # use QueryManager to process custom queries self._query_manager = QueryManager(self, self.execute_query_raw, queries=[], tags=self.instance.get("tags", [])) self.check_initializations.append(self.config_checks) self.check_initializations.append(self._query_manager.compile_queries) self.check_initializations.append(self.initialize_connection) def config_checks(self): if self.autodiscovery and self.instance.get('database'): self.log.warning( 'sqlserver `database_autodiscovery` and `database` options defined in same instance - ' 'autodiscovery will take precedence.' ) if not self.autodiscovery and (self.autodiscovery_include or self.autodiscovery_exclude): self.log.warning( "Autodiscovery is disabled, autodiscovery_include and autodiscovery_exclude will be ignored" ) def initialize_connection(self): self.connection = Connection(self.init_config, self.instance, self.handle_service_check) # Pre-process the list of metrics to collect try: # check to see if the database exists before we try any connections to it db_exists, context = self.connection.check_database() if db_exists: if self.instance.get('stored_procedure') is None: with self.connection.open_managed_default_connection(): with self.connection.get_managed_cursor() as cursor: self.autodiscover_databases(cursor) self._make_metric_list_to_collect(self.custom_metrics) else: # How much do we care that the DB doesn't exist? ignore = is_affirmative(self.instance.get("ignore_missing_database", False)) if ignore is not None and ignore: # not much : we expect it. leave checks disabled self.do_check = False self.log.warning("Database %s does not exist. Disabling checks for this instance.", context) else: # yes we do. Keep trying msg = "Database {} does not exist. Please resolve invalid database and restart agent".format( context ) raise ConfigurationError(msg) except SQLConnectionError as e: self.log.exception("Error connecting to database: %s", e) except ConfigurationError: raise except Exception as e: self.log.exception("Initialization exception %s", e) def handle_service_check(self, status, host, database, message=None): custom_tags = self.instance.get("tags", []) if custom_tags is None: custom_tags = [] service_check_tags = ['host:{}'.format(host), 'db:{}'.format(database)] service_check_tags.extend(custom_tags) service_check_tags = list(set(service_check_tags)) self.service_check(SERVICE_CHECK_NAME, status, tags=service_check_tags, message=message, raw=True) def _compile_patterns(self): self._include_patterns = self._compile_valid_patterns(self.autodiscovery_include) self._exclude_patterns = self._compile_valid_patterns(self.autodiscovery_exclude) def _compile_valid_patterns(self, patterns): valid_patterns = [] for pattern in patterns: # Ignore empty patterns as they match everything if not pattern: continue try: re.compile(pattern, re.IGNORECASE) except Exception: self.log.warning('%s is not a valid regular expression and will be ignored', pattern) else: valid_patterns.append(pattern) if valid_patterns: return re.compile('|'.join(valid_patterns), re.IGNORECASE) else: # create unmatchable regex - https://stackoverflow.com/a/1845097/2157429 return re.compile(r'(?!x)x') def autodiscover_databases(self, cursor): if not self.autodiscovery: return False now = time.time() if now - self.ad_last_check > self.autodiscovery_interval: self.log.info('Performing database autodiscovery') cursor.execute(AUTODISCOVERY_QUERY) all_dbs = set(row.name for row in cursor.fetchall()) excluded_dbs = set([d for d in all_dbs if self._exclude_patterns.match(d)]) included_dbs = set([d for d in all_dbs if self._include_patterns.match(d)]) self.log.debug( 'Autodiscovered databases: %s, excluding: %s, including: %s', all_dbs, excluded_dbs, included_dbs ) # keep included dbs but remove any that were explicitly excluded filtered_dbs = all_dbs.intersection(included_dbs) - excluded_dbs self.log.debug('Resulting filtered databases: %s', filtered_dbs) self.ad_last_check = now if filtered_dbs != self.databases: self.log.debug('Databases updated from previous autodiscovery check.') self.databases = filtered_dbs return True return False def _make_metric_list_to_collect(self, custom_metrics): """ Store the list of metrics to collect by instance_key. Will also create and cache cursors to query the db. """ metrics_to_collect = [] tags = self.instance.get('tags', []) # Load instance-level (previously Performance) metrics) # If several check instances are querying the same server host, it can be wise to turn these off # to avoid sending duplicate metrics if is_affirmative(self.instance.get('include_instance_metrics', True)): self._add_performance_counters( chain(INSTANCE_METRICS, INSTANCE_METRICS_TOTAL), metrics_to_collect, tags, db=None ) # populated through autodiscovery if self.databases: for db in self.databases: self._add_performance_counters(INSTANCE_METRICS_TOTAL, metrics_to_collect, tags, db=db) # Load database statistics for name, table, column in DATABASE_METRICS: # include database as a filter option db_names = self.databases or [self.instance.get('database', self.connection.DEFAULT_DATABASE)] for db_name in db_names: cfg = {'name': name, 'table': table, 'column': column, 'instance_name': db_name, 'tags': tags} metrics_to_collect.append(self.typed_metric(cfg_inst=cfg, table=table, column=column)) # Load AlwaysOn metrics if is_affirmative(self.instance.get('include_ao_metrics', False)): for name, table, column in AO_METRICS + AO_METRICS_PRIMARY + AO_METRICS_SECONDARY: db_name = 'master' cfg = { 'name': name, 'table': table, 'column': column, 'instance_name': db_name, 'tags': tags, 'ao_database': self.instance.get('ao_database', None), 'availability_group': self.instance.get('availability_group', None), 'only_emit_local': is_affirmative(self.instance.get('only_emit_local', False)), } metrics_to_collect.append(self.typed_metric(cfg_inst=cfg, table=table, column=column)) # Load FCI metrics if is_affirmative(self.instance.get('include_fci_metrics', False)): for name, table, column in FCI_METRICS: cfg = { 'name': name, 'table': table, 'column': column, 'tags': tags, } metrics_to_collect.append(self.typed_metric(cfg_inst=cfg, table=table, column=column)) # Load metrics from scheduler and task tables, if enabled if is_affirmative(self.instance.get('include_task_scheduler_metrics', False)): for name, table, column in TASK_SCHEDULER_METRICS: cfg = {'name': name, 'table': table, 'column': column, 'tags': tags} metrics_to_collect.append(self.typed_metric(cfg_inst=cfg, table=table, column=column)) # Load DB Fragmentation metrics if is_affirmative(self.instance.get('include_db_fragmentation_metrics', False)): db_fragmentation_object_names = self.instance.get('db_fragmentation_object_names', []) db_names = self.databases or [self.instance.get('database', self.connection.DEFAULT_DATABASE)] if not db_fragmentation_object_names: self.log.debug( "No fragmentation object names specified, will return fragmentation metrics for all " "object_ids of current database(s): %s", db_names, ) for db_name in db_names: for name, table, column in DATABASE_FRAGMENTATION_METRICS: cfg = { 'name': name, 'table': table, 'column': column, 'instance_name': db_name, 'tags': tags, 'db_fragmentation_object_names': db_fragmentation_object_names, } metrics_to_collect.append(self.typed_metric(cfg_inst=cfg, table=table, column=column)) # Load any custom metrics from conf.d/sqlserver.yaml for cfg in custom_metrics: sql_type = None base_name = None custom_tags = tags + cfg.get('tags', []) cfg['tags'] = custom_tags db_table = cfg.get('table', DEFAULT_PERFORMANCE_TABLE) if db_table not in VALID_TABLES: self.log.error('%s has an invalid table name: %s', cfg['name'], db_table) continue if cfg.get('database', None) and cfg.get('database') != self.instance.get('database'): self.log.debug( 'Skipping custom metric %s for database %s, check instance configured for database %s', cfg['name'], cfg.get('database'), self.instance.get('database'), ) continue if db_table == DEFAULT_PERFORMANCE_TABLE: user_type = cfg.get('type') if user_type is not None and user_type not in VALID_METRIC_TYPES: self.log.error('%s has an invalid metric type: %s', cfg['name'], user_type) sql_type = None try: if user_type is None: sql_type, base_name = self.get_sql_type(cfg['counter_name']) except Exception: self.log.warning("Can't load the metric %s, ignoring", cfg['name'], exc_info=True) continue metrics_to_collect.append( self.typed_metric( cfg_inst=cfg, table=db_table, base_name=base_name, user_type=user_type, sql_type=sql_type ) ) else: for column in cfg['columns']: metrics_to_collect.append( self.typed_metric( cfg_inst=cfg, table=db_table, base_name=base_name, sql_type=sql_type, column=column ) ) self.instance_metrics = metrics_to_collect self.log.debug("metrics to collect %s", metrics_to_collect) # create an organized grouping of metric names to their metric classes for m in metrics_to_collect: cls = m.__class__.__name__ name = m.sql_name or m.column self.log.debug("Adding metric class %s named %s", cls, name) self.instance_per_type_metrics[cls].append(name) if m.base_name: self.instance_per_type_metrics[cls].append(m.base_name) def _add_performance_counters(self, metrics, metrics_to_collect, tags, db=None): for name, counter_name, instance_name in metrics: try: sql_type, base_name = self.get_sql_type(counter_name) cfg = { 'name': name, 'counter_name': counter_name, 'instance_name': db or instance_name, 'tags': tags, } metrics_to_collect.append( self.typed_metric( cfg_inst=cfg, table=DEFAULT_PERFORMANCE_TABLE, base_name=base_name, sql_type=sql_type ) ) except SQLConnectionError: raise except Exception: self.log.warning("Can't load the metric %s, ignoring", name, exc_info=True) continue def get_sql_type(self, counter_name): """ Return the type of the performance counter so that we can report it to Datadog correctly If the sql_type is one that needs a base (PERF_RAW_LARGE_FRACTION and PERF_AVERAGE_BULK), the name of the base counter will also be returned """ with self.connection.get_managed_cursor() as cursor: cursor.execute(COUNTER_TYPE_QUERY, (counter_name,)) (sql_type,) = cursor.fetchone() if sql_type == PERF_LARGE_RAW_BASE: self.log.warning("Metric %s is of type Base and shouldn't be reported this way", counter_name) base_name = None if sql_type in [PERF_AVERAGE_BULK, PERF_RAW_LARGE_FRACTION]: # This is an ugly hack. For certains type of metric (PERF_RAW_LARGE_FRACTION # and PERF_AVERAGE_BULK), we need two metrics: the metrics specified and # a base metrics to get the ratio. There is no unique schema so we generate # the possible candidates and we look at which ones exist in the db. candidates = ( counter_name + " base", counter_name.replace("(ms)", "base"), counter_name.replace("Avg ", "") + " base", ) try: cursor.execute(BASE_NAME_QUERY, candidates) base_name = cursor.fetchone().counter_name.strip() self.log.debug("Got base metric: %s for metric: %s", base_name, counter_name) except Exception as e: self.log.warning("Could not get counter_name of base for metric: %s", e) return sql_type, base_name def typed_metric(self, cfg_inst, table, base_name=None, user_type=None, sql_type=None, column=None): """ Create the appropriate BaseSqlServerMetric object, each implementing its method to fetch the metrics properly. If a `type` was specified in the config, it is used to report the value directly fetched from SQLServer. Otherwise, it is decided based on the sql_type, according to microsoft's documentation. """ if table == DEFAULT_PERFORMANCE_TABLE: metric_type_mapping = { PERF_COUNTER_BULK_COUNT: (self.rate, metrics.SqlSimpleMetric), PERF_COUNTER_LARGE_RAWCOUNT: (self.gauge, metrics.SqlSimpleMetric), PERF_LARGE_RAW_BASE: (self.gauge, metrics.SqlSimpleMetric), PERF_RAW_LARGE_FRACTION: (self.gauge, metrics.SqlFractionMetric), PERF_AVERAGE_BULK: (self.gauge, metrics.SqlIncrFractionMetric), } if user_type is not None: # user type overrides any other value metric_type = getattr(self, user_type) cls = metrics.SqlSimpleMetric else: metric_type, cls = metric_type_mapping[sql_type] else: # Lookup metrics classes by their associated table metric_type_str, cls = metrics.TABLE_MAPPING[table] metric_type = getattr(self, metric_type_str) return cls(cfg_inst, base_name, metric_type, column, self.log) def check(self, _): if self.do_check: if self.proc: self.do_stored_procedure_check() else: self.collect_metrics() else: self.log.debug("Skipping check") def collect_metrics(self): """Fetch the metrics from all of the associated database tables.""" with self.connection.open_managed_default_connection(): with self.connection.get_managed_cursor() as cursor: # initiate autodiscovery or if the server was down at check __init__ key could be missing. if self.autodiscover_databases(cursor) or not self.instance_metrics: self._make_metric_list_to_collect(self.custom_metrics) instance_results = {} # Execute the `fetch_all` operations first to minimize the database calls for cls, metric_names in six.iteritems(self.instance_per_type_metrics): if not metric_names: instance_results[cls] = None, None else: try: rows, cols = getattr(metrics, cls).fetch_all_values(cursor, metric_names, self.log) except Exception as e: self.log.error("Error running `fetch_all` for metrics %s - skipping. Error: %s", cls, e) rows, cols = None, None instance_results[cls] = rows, cols # Using the cached data, extract and report individual metrics for metric in self.instance_metrics: if type(metric) is metrics.SqlIncrFractionMetric: # special case, since it uses the same results as SqlFractionMetric rows, cols = instance_results['SqlFractionMetric'] if rows is not None: metric.fetch_metric(rows, cols) else: rows, cols = instance_results[metric.__class__.__name__] if rows is not None: metric.fetch_metric(rows, cols) # reuse connection for any custom queries self._query_manager.execute() def execute_query_raw(self, query): with self.connection.get_managed_cursor() as cursor: cursor.execute(query) return cursor.fetchall() def do_stored_procedure_check(self): """ Fetch the metrics from the stored proc """ proc = self.proc guardSql = self.instance.get('proc_only_if') custom_tags = self.instance.get("tags", []) if (guardSql and self.proc_check_guard(guardSql)) or not guardSql: self.connection.open_db_connections(self.connection.DEFAULT_DB_KEY) cursor = self.connection.get_cursor(self.connection.DEFAULT_DB_KEY) try: self.log.debug("Calling Stored Procedure : %s", proc) if self.connection.get_connector() == 'adodbapi': cursor.callproc(proc) else: # pyodbc does not support callproc; use execute instead. # Reference: https://github.com/mkleehammer/pyodbc/wiki/Calling-Stored-Procedures call_proc = '{{CALL {}}}'.format(proc) cursor.execute(call_proc) rows = cursor.fetchall() self.log.debug("Row count (%s) : %s", proc, cursor.rowcount) for row in rows: tags = [] if row.tags is None or row.tags == '' else row.tags.split(',') tags.extend(custom_tags) if row.type.lower() in self.proc_type_mapping: self.proc_type_mapping[row.type](row.metric, row.value, tags, raw=True) else: self.log.warning( '%s is not a recognised type from procedure %s, metric %s', row.type, proc, row.metric ) except Exception as e: self.log.warning("Could not call procedure %s: %s", proc, e) raise e self.connection.close_cursor(cursor) self.connection.close_db_connections(self.connection.DEFAULT_DB_KEY) else: self.log.info("Skipping call to %s due to only_if", proc) def proc_check_guard(self, sql): """ check to see if the guard SQL returns a single column containing 0 or 1 We return true if 1, else False """ self.connection.open_db_connections(self.connection.PROC_GUARD_DB_KEY) cursor = self.connection.get_cursor(self.connection.PROC_GUARD_DB_KEY) should_run = False try: cursor.execute(sql, ()) result = cursor.fetchone() should_run = result[0] == 1 except Exception as e: self.log.error("Failed to run proc_only_if sql %s : %s", sql, e) self.connection.close_cursor(cursor) self.connection.close_db_connections(self.connection.PROC_GUARD_DB_KEY) return should_run
class RethinkDBCheck(AgentCheck): """ Collect metrics from a RethinkDB cluster. """ __NAMESPACE__ = 'rethinkdb' SERVICE_CHECK_CONNECT = 'can_connect' def __init__(self, *args, **kwargs): # type: (*Any, **Any) -> None super(RethinkDBCheck, self).__init__(*args, **kwargs) self._config = Config(cast(Instance, self.instance)) if self._config.password: self.register_secret(self._config.password) self._conn = None # type: Optional[rethinkdb.net.Connection] manager_queries = [ queries.ClusterMetrics, queries.ServerMetrics, queries.DatabaseConfigMetrics, queries.DatabaseTableMetrics, queries.TableConfigMetrics, queries.ReplicaMetrics, queries.ShardMetrics, queries.JobMetrics, queries.CurrentIssuesMetrics, ] # type: list if self.is_metadata_collection_enabled: manager_queries.append(queries.VersionMetadata) self._query_manager = QueryManager( self, executor=self._execute_raw_query, queries=manager_queries, tags=self._config.tags, ) self._query_funcs = {} # type: Dict[str, Callable] self.check_initializations.append(self._query_manager.compile_queries) def _execute_raw_query(self, query): # type: (str) -> List[tuple] query_func = self._query_funcs.get(query) if query_func is None: # QueryManager only supports `str` queries. # So here's the workaround: we make `query` refer to the import paths of query functions, then import here. # Cache the results so imports only happen on the first check run. module_name, _, func_name = query.partition(':') module = importlib.import_module(module_name, package='datadog_checks.rethinkdb') query_func = getattr(module, func_name) self._query_funcs[query] = query_func return query_func(self._conn) @contextmanager def connect_submitting_service_checks(self): # type: () -> Iterator[None] config = self._config tags = config.service_check_tags try: with rethinkdb.r.connect( host=config.host, port=config.port, user=config.user, password=config.password, ssl={'ca_certs': config.tls_ca_cert} if config.tls_ca_cert is not None else {}, ) as conn: self._conn = conn yield except rethinkdb.errors.ReqlDriverError as exc: message = 'Could not connect to RethinkDB server: {!r}'.format(exc) self.log.error(message) self.service_check(self.SERVICE_CHECK_CONNECT, self.CRITICAL, tags=tags, message=message) raise except Exception as exc: message = 'Unexpected error while executing RethinkDB check: {!r}'.format(exc) self.log.error(message) self.service_check(self.SERVICE_CHECK_CONNECT, self.CRITICAL, tags=tags, message=message) raise else: self.service_check(self.SERVICE_CHECK_CONNECT, self.OK, tags=tags) finally: self._conn = None def collect_metrics(self): # Exposed for mocking purposes. # type: () -> None self._query_manager.execute() def check(self, instance): # type: (Any) -> None with self.connect_submitting_service_checks(): self.collect_metrics()
class ClickhouseCheck(AgentCheck): __NAMESPACE__ = 'clickhouse' SERVICE_CHECK_CONNECT = 'can_connect' def __init__(self, name, init_config, instances): super(ClickhouseCheck, self).__init__(name, init_config, instances) self._server = self.instance.get('server', '') self._port = self.instance.get('port') self._db = self.instance.get('db', 'default') self._user = self.instance.get('user', 'default') self._password = self.instance.get('password', '') self._connect_timeout = float(self.instance.get('connect_timeout', 10)) self._read_timeout = float(self.instance.get('read_timeout', 10)) self._compression = self.instance.get('compression', False) self._tls_verify = is_affirmative( self.instance.get('tls_verify', False)) self._tags = self.instance.get('tags', []) # Add global tags self._tags.append('server:{}'.format(self._server)) self._tags.append('port:{}'.format(self._port)) self._tags.append('db:{}'.format(self._db)) self._error_sanitizer = ErrorSanitizer(self._password) self.check_initializations.append(self.validate_config) # We'll connect on the first check run self._client = None self.check_initializations.append(self.create_connection) self._query_manager = QueryManager( self, self.execute_query_raw, queries=[ queries.SystemMetrics, queries.SystemEvents, queries.SystemAsynchronousMetrics, queries.SystemParts, queries.SystemReplicas, queries.SystemDictionaries, ], tags=self._tags, error_handler=self._error_sanitizer.clean, ) self.check_initializations.append(self._query_manager.compile_queries) def check(self, _): self._query_manager.execute() self.collect_version() def collect_version(self): version = list(self.execute_query_raw('SELECT version()'))[0][0] # The version comes in like `19.15.2.2` though sometimes there is no patch part version_parts = { name: part for name, part in zip(('year', 'major', 'minor', 'patch'), version.split('.')) } self.set_metadata('version', version, scheme='parts', final_scheme='calver', part_map=version_parts) def execute_query_raw(self, query): return self._client.execute_iter(query) def validate_config(self): if not self._server: raise ConfigurationError('the `server` setting is required') def create_connection(self): try: client = clickhouse_driver.Client( host=self._server, port=self._port, user=self._user, password=self._password, database=self._db, connect_timeout=self._connect_timeout, send_receive_timeout=self._read_timeout, sync_request_timeout=self._connect_timeout, compression=self._compression, secure=self._tls_verify, # Don't pollute the Agent logs settings={'calculate_text_stack_trace': False}, # Make every client unique for server logs client_name='datadog-{}'.format(self.check_id), ) client.connection.connect() except Exception as e: error = 'Unable to connect to ClickHouse: {}'.format( self._error_sanitizer.clean(self._error_sanitizer.scrub( str(e)))) self.service_check(self.SERVICE_CHECK_CONNECT, self.CRITICAL, message=error, tags=self._tags) # When an exception is raised in the context of another one, both will be printed. To avoid # this we set the context to None. https://www.python.org/dev/peps/pep-0409/ raise_from(type(e)(error), None) else: self.service_check(self.SERVICE_CHECK_CONNECT, self.OK, tags=self._tags) self._client = client