Пример #1
0
    def __init__(self, name, init_config, instances):
        super(Oracle, self).__init__(name, init_config, instances)
        self._server = self.instance.get('server')
        self._user = self.instance.get('user')
        self._password = self.instance.get('password')
        self._service = self.instance.get('service_name')
        self._jdbc_driver = self.instance.get('jdbc_driver_path')
        self._tags = self.instance.get('tags') or []
        self._service_check_tags = ['server:{}'.format(self._server)]
        self._service_check_tags.extend(self._tags)

        self._cached_connection = None

        manager_queries = []
        if not self.instance.get('only_custom_queries', False):
            manager_queries.extend([
                queries.ProcessMetrics, queries.SystemMetrics,
                queries.TableSpaceMetrics
            ])

        self._fix_custom_queries()

        self._query_manager = QueryManager(
            self,
            self.execute_query_raw,
            queries=manager_queries,
            error_handler=self.handle_query_error,
            tags=self._tags,
        )

        self.check_initializations.append(self.validate_config)
        self.check_initializations.append(self._query_manager.compile_queries)

        self._current_errors = 0
Пример #2
0
    def __init__(self, *args, **kwargs):
        # type: (*Any, **Any) -> None
        super(RethinkDBCheck, self).__init__(*args, **kwargs)

        self._config = Config(cast(Instance, self.instance))

        if self._config.password:
            self.register_secret(self._config.password)

        self._conn = None  # type: Optional[rethinkdb.net.Connection]

        manager_queries = [
            queries.ClusterMetrics,
            queries.ServerMetrics,
            queries.DatabaseConfigMetrics,
            queries.DatabaseTableMetrics,
            queries.TableConfigMetrics,
            queries.ReplicaMetrics,
            queries.ShardMetrics,
            queries.JobMetrics,
            queries.CurrentIssuesMetrics,
        ]  # type: list

        if self.is_metadata_collection_enabled:
            manager_queries.append(queries.VersionMetadata)

        self._query_manager = QueryManager(
            self,
            executor=self._execute_raw_query,
            queries=manager_queries,
            tags=self._config.tags,
        )
        self._query_funcs = {}  # type: Dict[str, Callable]

        self.check_initializations.append(self._query_manager.compile_queries)
Пример #3
0
    def __init__(self, *args, **kwargs):
        super(SnowflakeCheck, self).__init__(*args, **kwargs)
        self.config = Config(self.instance)
        self._conn = None

        # Add default tags like account to all metrics
        self._tags = self.config.tags + ['account:{}'.format(self.config.account)]

        if self.config.password:
            self.register_secret(self.config.password)

        self.metric_queries = []
        self.errors = []
        for mgroup in self.config.metric_groups:
            try:
                self.metric_queries.extend(METRIC_GROUPS[mgroup])
            except KeyError:
                self.errors.append(mgroup)

        if self.errors:
            self.log.warning('Invalid metric_groups found in snowflake conf.yaml: %s', (', '.join(self.errors)))
        if not self.metric_queries:
            raise ConfigurationError('No valid metric_groups configured, please list at least one.')

        self._query_manager = QueryManager(self, self.execute_query_raw, queries=self.metric_queries, tags=self._tags)
        self.check_initializations.append(self._query_manager.compile_queries)
Пример #4
0
    def __init__(self, name, init_config, instances):
        super(TeradataCheck, self).__init__(name, init_config, instances)

        self._connect_params = None
        self._connection = None
        self._tags = []
        self._query_errors = 0
        self._tables_filter = None

        manager_queries = deepcopy(DEFAULT_QUERIES)
        if is_affirmative(self.instance.get('collect_res_usage_metrics',
                                            False)):
            manager_queries.extend(COLLECT_RES_USAGE)
        if is_affirmative(
                self.instance.get('collect_table_disk_metrics', False)):
            manager_queries.extend(COLLECT_ALL_SPACE)

        self._query_manager = QueryManager(
            self,
            self._execute_query_raw,
            queries=manager_queries,
            tags=self._tags,
            error_handler=self._executor_error_handler,
        )
        self.check_initializations.append(self.initialize_config)
        self.check_initializations.append(self._query_manager.compile_queries)
Пример #5
0
    def __init__(self, name, init_config, instances):
        super(SQLServer, self).__init__(name, init_config, instances)

        self.connection = None
        self.failed_connections = {}
        self.instance_metrics = []
        self.instance_per_type_metrics = defaultdict(list)
        self.do_check = True

        self.autodiscovery = is_affirmative(self.instance.get('database_autodiscovery'))
        self.autodiscovery_include = self.instance.get('autodiscovery_include', ['.*'])
        self.autodiscovery_exclude = self.instance.get('autodiscovery_exclude', [])
        self._compile_patterns()
        self.autodiscovery_interval = self.instance.get('autodiscovery_interval', DEFAULT_AUTODISCOVERY_INTERVAL)
        self.databases = set()
        self.ad_last_check = 0

        self.proc = self.instance.get('stored_procedure')
        self.proc_type_mapping = {'gauge': self.gauge, 'rate': self.rate, 'histogram': self.histogram}
        self.custom_metrics = init_config.get('custom_metrics', [])

        # use QueryManager to process custom queries
        self._query_manager = QueryManager(self, self.execute_query_raw, queries=[], tags=self.instance.get("tags", []))
        self.check_initializations.append(self.config_checks)
        self.check_initializations.append(self._query_manager.compile_queries)
        self.check_initializations.append(self.initialize_connection)
Пример #6
0
    def __init__(self, name, init_config, instances):
        super(Oracle, self).__init__(name, init_config, instances)
        (
            self._server,
            self._user,
            self._password,
            self._service,
            self._jdbc_driver,
            self._tags,
            only_custom_queries,
        ) = self._get_config(self.instance)

        self.check_initializations.append(self.validate_config)

        self._connection = None

        manager_queries = []
        if not only_custom_queries:
            manager_queries.extend([
                queries.ProcessMetrics, queries.SystemMetrics,
                queries.TableSpaceMetrics
            ])

        self._fix_custom_queries()

        self._query_manager = QueryManager(
            self,
            self.execute_query_raw,
            queries=manager_queries,
            tags=self._tags,
        )
        self.check_initializations.append(self._query_manager.compile_queries)
Пример #7
0
    def __init__(self, name, init_config, instances):
        # type: (str, dict, list) -> None
        super(VoltDBCheck, self).__init__(name, init_config, instances)

        self._config = Config(cast(Instance, self.instance),
                              debug=self.log.debug)
        self.register_secret(self._config.password)
        self._client = Client(
            url=self._config.url,
            http_get=self.http.get,
            username=self._config.username,
            password=self._config.password,
            password_hashed=self._config.password_hashed,
        )

        self._query_manager = QueryManager(
            self,
            self._execute_query_raw,
            queries=[
                queries.CPUMetrics,
                queries.MemoryMetrics,
                queries.SnapshotStatusMetrics,
                queries.CommandLogMetrics,
                queries.ProcedureMetrics,
                queries.LatencyMetrics,
                queries.GCMetrics,
                queries.IOStatsMetrics,
                queries.TableMetrics,
                queries.IndexMetrics,
            ],
            tags=self._config.tags,
        )
        self.check_initializations.append(self._query_manager.compile_queries)
Пример #8
0
    def __init__(self, name, init_config, instances):
        super(SQLServer, self).__init__(name, init_config, instances)

        self.failed_connections = {}
        self.instance_metrics = []
        self.instance_per_type_metrics = defaultdict(list)
        self.do_check = True

        self.proc = self.instance.get('stored_procedure')
        self.proc_type_mapping = {
            'gauge': self.gauge,
            'rate': self.rate,
            'histogram': self.histogram
        }

        # use QueryManager to process custom queries
        self._query_manager = QueryManager(self,
                                           self.execute_query_raw,
                                           queries=[],
                                           tags=self.instance.get("tags", []))
        self.check_initializations.append(self._query_manager.compile_queries)

        self.connection = Connection(init_config, self.instance,
                                     self.handle_service_check, self.log)

        # Pre-process the list of metrics to collect
        self.custom_metrics = init_config.get('custom_metrics', [])
        try:
            # check to see if the database exists before we try any connections to it
            db_exists, context = self.connection.check_database()

            if db_exists:
                if self.instance.get('stored_procedure') is None:
                    with self.connection.open_managed_default_connection():
                        self._make_metric_list_to_collect(self.custom_metrics)
            else:
                # How much do we care that the DB doesn't exist?
                ignore = is_affirmative(
                    self.instance.get("ignore_missing_database", False))
                if ignore is not None and ignore:
                    # not much : we expect it. leave checks disabled
                    self.do_check = False
                    self.log.warning(
                        "Database %s does not exist. Disabling checks for this instance.",
                        context)
                else:
                    # yes we do. Keep trying
                    msg = "Database {} does not exist. Please resolve invalid database and restart agent".format(
                        context)
                    raise ConfigurationError(msg)

        # Historically, the check does not raise exceptions on init failures
        # We continue that here for backwards compatibility, aside from the new Config exception
        except SQLConnectionError as e:
            self.log.exception("Error connecting to database: %s", e)
        except ConfigurationError:
            raise
        except Exception as e:
            self.log.exception("Initialization exception %s", e)
Пример #9
0
    def set_up_query_manager(self):
        system_info = self.fetch_system_info()
        if system_info:
            query_list = []
            QUERY_MAP = queries.query_map(self.config)
            is_7_3_or_higher = system_info.os_version > 7 or (
                system_info.os_version == 7 and system_info.os_release >= 3)

            for query in self.config.queries:
                if query.name == "disk_usage":
                    # disk_usage works differently on 7.2 vs 7.3
                    if is_7_3_or_higher:
                        query_list.append(
                            queries.get_base_disk_usage_73(
                                self.config.query_timeout))
                        query_list.append(
                            queries.get_disk_usage(self.config.query_timeout))
                    else:
                        query_list.append(
                            queries.get_base_disk_usage_72(
                                self.config.query_timeout))
                elif query.name == "subsystem":
                    # subsystem is only supported on 7.3
                    if is_7_3_or_higher:
                        query_list.append(
                            queries.get_subsystem_info(
                                self.config.query_timeout))
                    else:
                        # For backwards compatibility, we don't fail
                        self.log.info(
                            "Skipping 'subsystem' query since target system is older than 7.3"
                        )
                elif query.name not in QUERY_MAP:
                    raise ConfigurationError(
                        "Unknown or unsupported query name: {}".format(
                            query.name))
                else:
                    query_list.append(QUERY_MAP[query.name])

            hostname = system_info.hostname
            # Override hostname with configuration
            if self.config.hostname:
                hostname = self.config.hostname

            self._query_manager = QueryManager(
                self,
                self.execute_query,
                tags=self.config.tags,
                queries=query_list,
                hostname=hostname,
                error_handler=self.handle_query_error,
            )
            self._query_manager.compile_queries()
Пример #10
0
    def __init__(self, name, init_config, instances):
        super(MySql, self).__init__(name, init_config, instances)
        self.qcache_stats = {}
        self.version = None
        self.config = MySQLConfig(self.instance)

        # Create a new connection on every check run
        self._conn = None

        self._query_manager = QueryManager(self, self.execute_query_raw, queries=[], tags=self.config.tags)
        self._statement_metrics = MySQLStatementMetrics(self.config)
        self.check_initializations.append(self._query_manager.compile_queries)
        self.innodb_stats = InnoDBMetrics()
        self.check_initializations.append(self.config.configuration_checks)
Пример #11
0
def create_query_manager(*args, **kwargs):
    executor = kwargs.pop('executor', None)
    if executor is None:
        executor = mock_executor()

    check = kwargs.pop('check', None) or AgentCheck('test', {}, [{}])
    return QueryManager(check, executor, [Query(arg) for arg in args],
                        **kwargs)
Пример #12
0
    def __init__(self, *args, **kwargs):
        super(SnowflakeCheck, self).__init__(*args, **kwargs)
        self._config = Config(self.instance)
        self._conn = None

        self.proxy_host = self.init_config.get('proxy_host', None)
        self.proxy_port = self.init_config.get('proxy_port', None)
        self.proxy_user = self.init_config.get('proxy_user', None)
        self.proxy_password = self.init_config.get('proxy_password', None)

        # Add default tags like account to all metrics
        self._tags = self._config.tags + ['account:{}'.format(self._config.account)]

        if self._config.password:
            self.register_secret(self._config.password)

        if self._config.private_key_password:
            self.register_secret(self._config.private_key_password)

        if self._config.role == 'ACCOUNTADMIN':
            self.log.info(
                'Snowflake `role` is set as `ACCOUNTADMIN` which should be used cautiously, '
                'refer to docs about custom roles.'
            )

        self.metric_queries = []
        self.errors = []
        for mgroup in self._config.metric_groups:
            try:
                if not self._config.aggregate_last_24_hours:
                    for query in range(len(METRIC_GROUPS[mgroup])):
                        METRIC_GROUPS[mgroup][query]['query'] = METRIC_GROUPS[mgroup][query]['query'].replace(
                            'DATEADD(hour, -24, current_timestamp())', 'date_trunc(day, current_date)'
                        )
                self.metric_queries.extend(METRIC_GROUPS[mgroup])
            except KeyError:
                self.errors.append(mgroup)

        if self.errors:
            self.log.warning('Invalid metric_groups found in snowflake conf.yaml: %s', (', '.join(self.errors)))
        if not self.metric_queries and not self._config.custom_queries_defined:
            raise ConfigurationError('No valid metric_groups or custom query configured, please list at least one.')

        self._query_manager = QueryManager(self, self.execute_query_raw, queries=self.metric_queries, tags=self._tags)
        self.check_initializations.append(self._query_manager.compile_queries)
Пример #13
0
def create_query_manager(*args, **kwargs):
    executor = kwargs.pop('executor', None)
    if executor is None:
        executor = mock_executor()

    check = kwargs.pop('check', None) or AgentCheck('test', {}, [{}])
    check.check_id = 'test:instance'

    return QueryManager(check, executor, args, **kwargs)
Пример #14
0
    def __init__(self, name, init_config, instances):
        super(ProxysqlCheck, self).__init__(name, init_config, instances)
        self.host = self.instance.get("host", "")
        self.port = int(self.instance.get("port", 0))
        self.user = self.instance.get("username", "")
        self.password = str(self.instance.get("password", ""))

        if not all((self.host, self.port, self.user, self.password)):
            raise ConfigurationError(
                "ProxySQL host, port, username and password are needed")

        self.database_name = self.instance.get("database_name", "stats")
        self.tls_verify = self.instance.get("tls_verify", False)
        self.validate_hostname = self.instance.get("validate_hostname", True)
        self.tls_ca_cert = self.instance.get("tls_ca_cert")
        self.connect_timeout = self.instance.get("connect_timeout", 10)
        self.read_timeout = self.instance.get("read_timeout")

        self.tags = self.instance.get("tags", [])
        self.tags.append("proxysql_server:{}".format(self.host))
        self.tags.append("proxysql_port:{}".format(self.port))

        manager_queries = [STATS_MYSQL_GLOBAL]
        if self.is_metadata_collection_enabled():
            # Add the query to collect the ProxySQL version
            manager_queries.append(VERSION_METADATA)

        additional_metrics = self.instance.get("additional_metrics", [])
        for additional_group in additional_metrics:
            if additional_group not in ADDITIONAL_METRICS_MAPPING:
                raise ConfigurationError(
                    "There is no additional metric group called '{}' for the ProxySQL integration, it should be one "
                    "of ({})".format(
                        additional_group,
                        ", ".join(ADDITIONAL_METRICS_MAPPING),
                    ))
            manager_queries.append(
                ADDITIONAL_METRICS_MAPPING[additional_group])
        self._connection = None
        self._query_manager = QueryManager(self,
                                           self.execute_query_raw,
                                           queries=manager_queries,
                                           tags=self.tags)
        self.check_initializations.append(self._query_manager.compile_queries)
Пример #15
0
    def __init__(self, *args, **kwargs):
        super(SnowflakeCheck, self).__init__(*args, **kwargs)
        self._config = Config(self.instance)
        self._conn = None

        self.proxy_host = self.init_config.get('proxy_host', None)
        self.proxy_port = self.init_config.get('proxy_port', None)
        self.proxy_user = self.init_config.get('proxy_user', None)
        self.proxy_password = self.init_config.get('proxy_password', None)

        # Add default tags like account to all metrics
        self._tags = self._config.tags + [
            'account:{}'.format(self._config.account)
        ]

        if self._config.password:
            self.register_secret(self._config.password)

        if self._config.role == 'ACCOUNTADMIN':
            self.log.info(
                'Snowflake `role` is set as `ACCOUNTADMIN` which should be used cautiously, '
                'refer to docs about custom roles.')

        self.metric_queries = []
        self.errors = []
        for mgroup in self._config.metric_groups:
            try:
                self.metric_queries.extend(METRIC_GROUPS[mgroup])
            except KeyError:
                self.errors.append(mgroup)

        if self.errors:
            self.log.warning(
                'Invalid metric_groups found in snowflake conf.yaml: %s',
                (', '.join(self.errors)))
        if not self.metric_queries:
            raise ConfigurationError(
                'No valid metric_groups configured, please list at least one.')

        self._query_manager = QueryManager(self,
                                           self.execute_query_raw,
                                           queries=self.metric_queries,
                                           tags=self._tags)
        self.check_initializations.append(self._query_manager.compile_queries)
Пример #16
0
    def __init__(self, name, init_config, instances):
        # type: (AnyStr, Dict[AnyStr, Any], List[Dict[AnyStr, Any]]) -> None
        super(SinglestoreCheck, self).__init__(name, init_config, instances)
        self.config = SingleStoreConfig(self.instance)
        self._connection = cast(pymysql.Connection, None)

        manager_queries = []
        manager_queries.extend(DEFAULT_QUERIES)
        if self.config.collect_system_metrics:
            manager_queries.extend(ADDITIONAL_SYSTEM_QUERIES)
        self._query_manager = QueryManager(self,
                                           self.execute_query_raw,
                                           queries=manager_queries,
                                           tags=self.config.tags)
        self.check_initializations.append(self._query_manager.compile_queries)
        self._service_check_tags = [
            'singlestore_endpoint:{}:{}'.format(self.config.host,
                                                self.config.port)
        ] + self.config.tags
Пример #17
0
    def __init__(self, name, init_config, instances):
        super(Oracle, self).__init__(name, init_config, instances)
        self._server = self.instance.get('server')
        self._user = self.instance.get('username') or self.instance.get('user')
        self._password = self.instance.get('password')
        self._service = self.instance.get('service_name')
        self._protocol = self.instance.get("protocol", PROTOCOL_TCP)
        self._jdbc_driver = self.instance.get('jdbc_driver_path')
        self._jdbc_truststore_path = self.instance.get('jdbc_truststore_path')
        self._jdbc_truststore_type = self.instance.get('jdbc_truststore_type')
        self._jdbc_truststore_password = self.instance.get(
            'jdbc_truststore_password', '')
        self._tags = self.instance.get('tags') or []
        self._service_check_tags = ['server:{}'.format(self._server)]
        self._service_check_tags.extend(self._tags)

        self._cached_connection = None

        manager_queries = []
        if not self.instance.get('only_custom_queries', False):
            manager_queries.extend([
                queries.ProcessMetrics, queries.SystemMetrics,
                queries.TableSpaceMetrics
            ])

        self._fix_custom_queries()

        self._query_manager = QueryManager(
            self,
            self.execute_query_raw,
            queries=manager_queries,
            error_handler=self.handle_query_error,
            tags=self._tags,
        )

        # Runtime validations are only py3, so this is for manually validating config on py2
        if PY2:
            self.check_initializations.append(self.validate_config)
        self.check_initializations.append(self._query_manager.compile_queries)

        self._query_errors = 0
        self._connection_errors = 0
    def __init__(self, name, init_config, instances):
        super(ClickhouseCheck, self).__init__(name, init_config, instances)

        self._server = self.instance.get('server', '')
        self._port = self.instance.get('port')
        self._db = self.instance.get('db', 'default')
        self._user = self.instance.get('user', 'default')
        self._password = self.instance.get('password', '')
        self._connect_timeout = float(self.instance.get('connect_timeout', 10))
        self._read_timeout = float(self.instance.get('read_timeout', 10))
        self._compression = self.instance.get('compression', False)
        self._tls_verify = is_affirmative(
            self.instance.get('tls_verify', False))
        self._tags = self.instance.get('tags', [])

        # Add global tags
        self._tags.append('server:{}'.format(self._server))
        self._tags.append('port:{}'.format(self._port))
        self._tags.append('db:{}'.format(self._db))

        self._error_sanitizer = ErrorSanitizer(self._password)
        self.check_initializations.append(self.validate_config)

        # We'll connect on the first check run
        self._client = None
        self.check_initializations.append(self.create_connection)

        self._query_manager = QueryManager(
            self,
            self.execute_query_raw,
            queries=[
                queries.SystemMetrics,
                queries.SystemEvents,
                queries.SystemAsynchronousMetrics,
                queries.SystemParts,
                queries.SystemReplicas,
                queries.SystemDictionaries,
            ],
            tags=self._tags,
            error_handler=self._error_sanitizer.clean,
        )
        self.check_initializations.append(self._query_manager.compile_queries)
Пример #19
0
    def __init__(self, name, init_config, instances):
        # type: (str, dict, list) -> None
        super(VoltDBCheck, self).__init__(name, init_config, instances)

        self._config = Config(cast(Instance, self.instance), debug=self.log.debug)
        self.register_secret(self._config.password)
        self._client = Client(
            url=self._config.url,
            http_get=self.http.get,
            username=self._config.username,
            password=self._config.password,
            password_hashed=self._config.password_hashed,
        )

        self._query_manager = QueryManager(
            self,
            self._execute_query_raw,
            queries=self._config.queries,
            tags=self._config.tags,
        )
        self.check_initializations.append(self._query_manager.compile_queries)
Пример #20
0
    def __init__(self, name, init_config, instances):
        # type: (str, dict, list) -> None
        super(VoltDBCheck, self).__init__(name, init_config, instances)

        self._config = Config(cast(Instance, self.instance),
                              debug=self.log.debug)
        self.register_secret(self._config.password)
        self._client = Client(
            url=self._config.url,
            http_get=self.http.get,
            username=self._config.username,
            password=self._config.password,
            password_hashed=self._config.password_hashed,
        )

        manager_queries = [
            queries.CPUMetrics,
            queries.MemoryMetrics,
            queries.SnapshotStatusMetrics,
            queries.CommandLogMetrics,
            queries.ProcedureMetrics,
            queries.LatencyMetrics,
            queries.GCMetrics,
            queries.IOStatsMetrics,
            queries.TableMetrics,
            queries.IndexMetrics,
        ]

        if BASE_PARSED_VERSION < pkg_resources.parse_version('15.0.0'):
            # On Agent < 7.24.0 we must to pass `Query` objects instead of dicts.
            manager_queries = [Query(query)
                               for query in manager_queries]  # type: ignore

        self._query_manager = QueryManager(
            self,
            self._execute_query_raw,
            queries=manager_queries,
            tags=self._config.tags,
        )
        self.check_initializations.append(self._query_manager.compile_queries)
Пример #21
0
    def __init__(self, name, init_config, instances):
        super(MySql, self).__init__(name, init_config, instances)
        self.qcache_stats = {}
        self.version = None
        self.is_mariadb = None
        self._resolved_hostname = None
        self._agent_hostname = None
        self._is_aurora = None
        self._config = MySQLConfig(self.instance)

        # Create a new connection on every check run
        self._conn = None

        self._query_manager = QueryManager(self, self.execute_query_raw, queries=[])
        self.check_initializations.append(self._query_manager.compile_queries)
        self.innodb_stats = InnoDBMetrics()
        self.check_initializations.append(self._config.configuration_checks)
        self.performance_schema_enabled = None
        self._warnings_by_code = {}
        self._statement_metrics = MySQLStatementMetrics(self, self._config, self._get_connection_args())
        self._statement_samples = MySQLStatementSamples(self, self._config, self._get_connection_args())
        self._query_activity = MySQLActivity(self, self._config, self._get_connection_args())
    def test_queries_are_copied(self):
        class MyCheck(AgentCheck):
            pass

        check1 = MyCheck('test', {}, [{}])
        check2 = MyCheck('test', {}, [{}])
        dummy_query = {
            'name': 'test query',
            'query': 'foo',
            'columns': [
                {'name': 'test.foo', 'type': 'gauge', 'tags': ['override:ok']},
                {'name': 'test.baz', 'type': 'gauge', 'raw': True},
            ],
            'tags': ['test:bar'],
        }
        query_manager1 = QueryManager(check1, mock_executor(), [dummy_query])
        query_manager2 = QueryManager(check2, mock_executor(), [dummy_query])
        query_manager1.compile_queries()
        query_manager2.compile_queries()
        assert not id(query_manager1.queries[0]) == id(
            query_manager2.queries[0]
        ), "QueryManager does not copy the queries"
Пример #23
0
def test_custom_metrics_multiple_results(aggregator, check):
    con = mock.MagicMock()
    cursor = mock.MagicMock()
    data = [["tag_value1", "1"], ["tag_value2", "2"]]
    cursor.fetchall.side_effect = lambda: iter(data)
    con.cursor.return_value = cursor

    custom_queries = [{
        "metric_prefix":
        "oracle.test1",
        "query":
        "mocked",
        "columns": [{
            "name": "tag_name",
            "type": "tag"
        }, {
            "name": "metric",
            "type": "gauge"
        }],
        "tags": ["query_tags1"],
    }]

    check.instance['custom_queries'] = custom_queries
    check._fix_custom_queries()
    check._connection = con
    query_manager = QueryManager(check,
                                 check.execute_query_raw,
                                 tags=['custom_tag'])
    query_manager.compile_queries()

    query_manager.execute()

    aggregator.assert_metric(
        "oracle.test1.metric",
        value=1,
        count=1,
        tags=["tag_name:tag_value1", "query_tags1", "custom_tag"])
    aggregator.assert_metric(
        "oracle.test1.metric",
        value=2,
        count=1,
        tags=["tag_name:tag_value2", "query_tags1", "custom_tag"])
Пример #24
0
class Oracle(AgentCheck):
    __NAMESPACE__ = 'oracle'

    ORACLE_DRIVER_CLASS = "oracle.jdbc.OracleDriver"
    JDBC_CONNECT_STRING = "jdbc:oracle:thin:@//{}/{}"
    CX_CONNECT_STRING = "{}/{}@//{}/{}"

    SERVICE_CHECK_NAME = 'can_connect'

    def __init__(self, name, init_config, instances):
        super(Oracle, self).__init__(name, init_config, instances)
        (
            self._server,
            self._user,
            self._password,
            self._service,
            self._jdbc_driver,
            self._tags,
            only_custom_queries,
        ) = self._get_config(self.instance)

        self.check_initializations.append(self.validate_config)

        self._connection = None

        manager_queries = []
        if not only_custom_queries:
            manager_queries.extend([
                queries.ProcessMetrics, queries.SystemMetrics,
                queries.TableSpaceMetrics
            ])

        self._fix_custom_queries()

        self._query_manager = QueryManager(
            self,
            self.execute_query_raw,
            queries=manager_queries,
            tags=self._tags,
        )
        self.check_initializations.append(self._query_manager.compile_queries)

    def _fix_custom_queries(self):
        """
        For backward compatibility reasons, if a custom query specifies a
        `metric_prefix`, change the submission name to contain it.
        """
        custom_queries = self.instance.get('custom_queries', [])
        global_custom_queries = self.init_config.get('global_custom_queries',
                                                     [])
        for query in itertools.chain(custom_queries, global_custom_queries):
            prefix = query.get('metric_prefix')
            if prefix and prefix != self.__NAMESPACE__:
                if prefix.startswith(self.__NAMESPACE__ + '.'):
                    prefix = prefix[len(self.__NAMESPACE__) + 1:]
                for column in query.get('columns', []):
                    if column.get('type') != 'tag':
                        column['name'] = '{}.{}'.format(prefix, column['name'])

    def validate_config(self):
        if not self._server or not self._user:
            raise ConfigurationError("Oracle host and user are needed")

    def execute_query_raw(self, query):
        with closing(self._connection.cursor()) as cursor:
            cursor.execute(query)
            # JDBC doesn't support iter protocol
            return cursor.fetchall()

    def check(self, _):
        self.create_connection()
        with closing(self._connection):
            self._query_manager.execute()
            self._connection = None

    def _get_config(self, instance):
        server = instance.get('server')
        user = instance.get('user')
        password = instance.get('password')
        service = instance.get('service_name')
        jdbc_driver = instance.get('jdbc_driver_path')
        tags = instance.get('tags') or []
        only_custom_queries = instance.get('only_custom_queries', False)

        return server, user, password, service, jdbc_driver, tags, only_custom_queries

    def create_connection(self):
        service_check_tags = ['server:%s' % self._server]
        service_check_tags.extend(self._tags)

        try:
            # Check if the instantclient is available
            cx_Oracle.clientversion()
        except cx_Oracle.DatabaseError as e:
            # Fallback to JDBC
            use_oracle_client = False
            self.log.debug(
                'Oracle instant client unavailable, falling back to JDBC: %s',
                e)
            connect_string = self.JDBC_CONNECT_STRING.format(
                self._server, self._service)
        else:
            use_oracle_client = True
            self.log.debug('Running cx_Oracle version %s', cx_Oracle.version)
            connect_string = self.CX_CONNECT_STRING.format(
                self._user, self._password, self._server, self._service)

        try:
            if use_oracle_client:
                connection = cx_Oracle.connect(connect_string)
            elif JDBC_IMPORT_ERROR:
                self.log.error(
                    "Oracle client is unavailable and the integration is unable to import JDBC libraries. You may not "
                    "have the Microsoft Visual C++ Runtime 2015 installed on your system. Please double check your "
                    "installation and refer to the Datadog documentation for more information."
                )
                raise JDBC_IMPORT_ERROR
            else:
                try:
                    if jpype.isJVMStarted(
                    ) and not jpype.isThreadAttachedToJVM():
                        jpype.attachThreadToJVM()
                        jpype.java.lang.Thread.currentThread(
                        ).setContextClassLoader(
                            jpype.java.lang.ClassLoader.getSystemClassLoader())
                    connection = jdb.connect(self.ORACLE_DRIVER_CLASS,
                                             connect_string,
                                             [self._user, self._password],
                                             self._jdbc_driver)
                except Exception as e:
                    if "Class {} not found".format(
                            self.ORACLE_DRIVER_CLASS) in str(e):
                        msg = """Cannot run the Oracle check until either the Oracle instant client or the JDBC Driver
                        is available.
                        For the Oracle instant client, see:
                        http://www.oracle.com/technetwork/database/features/instant-client/index.html
                        You will also need to ensure the `LD_LIBRARY_PATH` is also updated so the libs are reachable.

                        For the JDBC Driver, see:
                        http://www.oracle.com/technetwork/database/application-development/jdbc/downloads/index.html
                        You will also need to ensure the jar is either listed in your $CLASSPATH or in the yaml
                        configuration file of the check.
                        """
                        self.log.error(msg)
                    raise

            self.log.debug("Connected to Oracle DB")
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.OK,
                               tags=service_check_tags)
        except Exception as e:
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.CRITICAL,
                               tags=service_check_tags)
            self.log.error(e)
            raise
        self._connection = connection
Пример #25
0
class SnowflakeCheck(AgentCheck):
    """
    Collect Snowflake account usage metrics
    """

    __NAMESPACE__ = 'snowflake'

    SERVICE_CHECK_CONNECT = 'snowflake.can_connect'

    def __init__(self, *args, **kwargs):
        super(SnowflakeCheck, self).__init__(*args, **kwargs)
        self._config = Config(self.instance)
        self._conn = None

        self.proxy_host = self.init_config.get('proxy_host', None)
        self.proxy_port = self.init_config.get('proxy_port', None)
        self.proxy_user = self.init_config.get('proxy_user', None)
        self.proxy_password = self.init_config.get('proxy_password', None)

        # Add default tags like account to all metrics
        self._tags = self._config.tags + [
            'account:{}'.format(self._config.account)
        ]

        if self._config.password:
            self.register_secret(self._config.password)

        if self._config.role == 'ACCOUNTADMIN':
            self.log.info(
                'Snowflake `role` is set as `ACCOUNTADMIN` which should be used cautiously, '
                'refer to docs about custom roles.')

        self.metric_queries = []
        self.errors = []
        for mgroup in self._config.metric_groups:
            try:
                self.metric_queries.extend(METRIC_GROUPS[mgroup])
            except KeyError:
                self.errors.append(mgroup)

        if self.errors:
            self.log.warning(
                'Invalid metric_groups found in snowflake conf.yaml: %s',
                (', '.join(self.errors)))
        if not self.metric_queries:
            raise ConfigurationError(
                'No valid metric_groups configured, please list at least one.')

        self._query_manager = QueryManager(self,
                                           self.execute_query_raw,
                                           queries=self.metric_queries,
                                           tags=self._tags)
        self.check_initializations.append(self._query_manager.compile_queries)

    def check(self, _):
        self.connect()

        if self._conn is not None:
            # Execute queries
            self._query_manager.execute()

            self._collect_version()

            self.log.debug("Closing connection to Snowflake...")
            self._conn.close()

    def execute_query_raw(self, query):
        """
        Executes query with timestamp from parts if comparing start_time field.
        """
        with closing(self._conn.cursor()) as cursor:
            cursor.execute(query)

            if cursor.rowcount is None or cursor.rowcount < 1:
                self.log.debug("Failed to fetch records from query: `%s`",
                               query)
                return []
            return cursor.fetchall()

    def connect(self):
        self.log.debug(
            "Establishing a new connection to Snowflake: account=%s, user=%s, database=%s, schema=%s, warehouse=%s, "
            "role=%s, timeout=%s, authenticator=%s, ocsp_response_cache_filename=%s, proxy_host=%s, proxy_port=%s",
            self._config.account,
            self._config.user,
            self._config.database,
            self._config.schema,
            self._config.warehouse,
            self._config.role,
            self._config.login_timeout,
            self._config.authenticator,
            self._config.ocsp_response_cache_filename,
            self.proxy_host,
            self.proxy_port,
        )

        try:
            conn = sf.connect(
                user=self._config.user,
                password=self._config.password,
                account=self._config.account,
                database=self._config.database,
                schema=self._config.schema,
                warehouse=self._config.warehouse,
                role=self._config.role,
                passcode_in_password=self._config.passcode_in_password,
                passcode=self._config.passcode,
                client_prefetch_threads=self._config.client_prefetch_threads,
                login_timeout=self._config.login_timeout,
                ocsp_response_cache_filename=self._config.
                ocsp_response_cache_filename,
                authenticator=self._config.authenticator,
                token=self._config.token,
                client_session_keep_alive=self._config.client_keep_alive,
                proxy_host=self.proxy_host,
                proxy_port=self.proxy_port,
                proxy_user=self.proxy_user,
                proxy_password=self.proxy_password,
            )
        except Exception as e:
            msg = "Unable to connect to Snowflake: {}".format(e)
            self.service_check(self.SERVICE_CHECK_CONNECT,
                               self.CRITICAL,
                               message=msg,
                               tags=self._tags)
            self.warning(msg)
        else:
            self.service_check(self.SERVICE_CHECK_CONNECT,
                               self.OK,
                               tags=self._tags)
            self._conn = conn

    @AgentCheck.metadata_entrypoint
    def _collect_version(self):
        try:
            raw_version = self.execute_query_raw("select current_version();")
            version = raw_version[0][0]
        except Exception as e:
            self.log.error("Error collecting version for Snowflake: %s", e)
        else:
            if version:
                self.set_metadata('version', version)
Пример #26
0
class MySql(AgentCheck):
    SERVICE_CHECK_NAME = 'mysql.can_connect'
    SLAVE_SERVICE_CHECK_NAME = 'mysql.replication.slave_running'
    DEFAULT_MAX_CUSTOM_QUERIES = 20

    def __init__(self, name, init_config, instances):
        super(MySql, self).__init__(name, init_config, instances)
        self.qcache_stats = {}
        self.version = None
        self.config = MySQLConfig(self.instance)

        # Create a new connection on every check run
        self._conn = None

        self._query_manager = QueryManager(self,
                                           self.execute_query_raw,
                                           queries=[],
                                           tags=self.config.tags)
        self._statement_metrics = MySQLStatementMetrics(self.config)
        self.check_initializations.append(self._query_manager.compile_queries)
        self.innodb_stats = InnoDBMetrics()
        self.check_initializations.append(self.config.configuration_checks)

    def execute_query_raw(self, query):
        with closing(self._conn.cursor(pymysql.cursors.SSCursor)) as cursor:
            cursor.execute(query)
            for row in cursor.fetchall_unbuffered():
                yield row

    @AgentCheck.metadata_entrypoint
    def _send_metadata(self):
        self.set_metadata('version',
                          self.version.version + '+' + self.version.build)
        self.set_metadata('flavor', self.version.flavor)

    @classmethod
    def get_library_versions(cls):
        return {'pymysql': pymysql.__version__}

    def check(self, _):
        self._set_qcache_stats()
        with self._connect() as db:
            try:
                self._conn = db

                # version collection
                self.version = get_version(db)
                self._send_metadata()

                # Metric collection
                self._collect_metrics(db)
                self._collect_system_metrics(self.config.host, db,
                                             self.config.tags)
                if self.config.deep_database_monitoring:
                    self._collect_statement_metrics(db, self.config.tags)

                # keeping track of these:
                self._put_qcache_stats()

                # Custom queries
                self._query_manager.execute()

            except Exception as e:
                self.log.exception("error!")
                raise e
            finally:
                self._conn = None

    def _set_qcache_stats(self):
        host_key = self._get_host_key()
        qcache_st = self.qcache_stats.get(host_key, (None, None, None))

        self._qcache_hits = qcache_st[0]
        self._qcache_inserts = qcache_st[1]
        self._qcache_not_cached = qcache_st[2]

    def _put_qcache_stats(self):
        host_key = self._get_host_key()
        self.qcache_stats[host_key] = (self._qcache_hits, self._qcache_inserts,
                                       self._qcache_not_cached)

    def _get_host_key(self):
        if self.config.defaults_file:
            return self.config.defaults_file

        hostkey = self.config.host
        if self.config.mysql_sock:
            hostkey = "{0}:{1}".format(hostkey, self.config.mysql_sock)
        elif self.config.port:
            hostkey = "{0}:{1}".format(hostkey, self.config.port)

        return hostkey

    def _get_connection_args(self):
        ssl = dict(self.config.ssl) if self.config.ssl else None
        connection_args = {
            'ssl': ssl,
            'connect_timeout': self.config.connect_timeout,
        }
        if self.config.charset:
            connection_args['charset'] = self.config.charset

        if self.config.defaults_file != '':
            connection_args['read_default_file'] = self.config.defaults_file
            return connection_args

        connection_args.update({
            'user': self.config.user,
            'passwd': self.config.password
        })
        if self.config.mysql_sock != '':
            self.service_check_tags = [
                'server:{0}'.format(self.config.mysql_sock),
                'port:unix_socket',
            ] + self.config.tags
            connection_args.update({'unix_socket': self.config.mysql_sock})
        else:
            connection_args.update({'host': self.config.host})

        if self.config.port:
            connection_args.update({'port': self.config.port})
        return connection_args

    @contextmanager
    def _connect(self):
        service_check_tags = [
            'server:{0}'.format(
                (self.config.mysql_sock
                 if self.config.mysql_sock != '' else self.config.host)),
            'port:{}'.format(
                self.config.port if self.config.port else 'unix_socket'),
        ] + self.config.tags
        db = None
        try:
            connect_args = self._get_connection_args()
            db = pymysql.connect(**connect_args)
            self.log.debug("Connected to MySQL")
            self.service_check_tags = list(set(service_check_tags))
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.OK,
                               tags=service_check_tags)
            yield db
        except Exception:
            self.service_check(self.SERVICE_CHECK_NAME,
                               AgentCheck.CRITICAL,
                               tags=service_check_tags)
            raise
        finally:
            if db:
                db.close()

    def _collect_metrics(self, db):

        # Get aggregate of all VARS we want to collect
        metrics = STATUS_VARS

        # collect results from db
        results = self._get_stats_from_status(db)
        results.update(self._get_stats_from_variables(db))

        if not is_affirmative(
                self.config.options.get(
                    'disable_innodb_metrics',
                    False)) and self._is_innodb_engine_enabled(db):
            results.update(self.innodb_stats.get_stats_from_innodb_status(db))
            self.innodb_stats.process_innodb_stats(results,
                                                   self.config.options,
                                                   metrics)

        # Binary log statistics
        if self._get_variable_enabled(results, 'log_bin'):
            results['Binlog_space_usage_bytes'] = self._get_binary_log_stats(
                db)

        # Compute key cache utilization metric
        key_blocks_unused = collect_scalar('Key_blocks_unused', results)
        key_cache_block_size = collect_scalar('key_cache_block_size', results)
        key_buffer_size = collect_scalar('key_buffer_size', results)
        results['Key_buffer_size'] = key_buffer_size

        try:
            # can be null if the unit is missing in the user config (4 instead of 4G for eg.)
            if key_buffer_size != 0:
                key_cache_utilization = 1 - (
                    (key_blocks_unused * key_cache_block_size) /
                    key_buffer_size)
                results['Key_cache_utilization'] = key_cache_utilization

            results['Key_buffer_bytes_used'] = collect_scalar(
                'Key_blocks_used', results) * key_cache_block_size
            results['Key_buffer_bytes_unflushed'] = (
                collect_scalar('Key_blocks_not_flushed', results) *
                key_cache_block_size)
        except TypeError as e:
            self.log.error(
                "Not all Key metrics are available, unable to compute: %s", e)

        metrics.update(VARIABLES_VARS)
        metrics.update(INNODB_VARS)
        metrics.update(BINLOG_VARS)

        if is_affirmative(
                self.config.options.get('extra_status_metrics', False)):
            self.log.debug("Collecting Extra Status Metrics")
            metrics.update(OPTIONAL_STATUS_VARS)

            if self.version.version_compatible((5, 6, 6)):
                metrics.update(OPTIONAL_STATUS_VARS_5_6_6)

        if is_affirmative(self.config.options.get('galera_cluster', False)):
            # already in result-set after 'SHOW STATUS' just add vars to collect
            self.log.debug("Collecting Galera Metrics.")
            metrics.update(GALERA_VARS)

        performance_schema_enabled = self._get_variable_enabled(
            results, 'performance_schema')
        above_560 = self.version.version_compatible((5, 6, 0))
        if (is_affirmative(
                self.config.options.get('extra_performance_metrics', False))
                and above_560 and performance_schema_enabled):
            # report avg query response time per schema to Datadog
            results[
                'perf_digest_95th_percentile_avg_us'] = self._get_query_exec_time_95th_us(
                    db)
            results['query_run_time_avg'] = self._query_exec_time_per_schema(
                db)
            metrics.update(PERFORMANCE_VARS)

        if is_affirmative(self.config.options.get('schema_size_metrics',
                                                  False)):
            # report avg query response time per schema to Datadog
            results['information_schema_size'] = self._query_size_per_schema(
                db)
            metrics.update(SCHEMA_VARS)

        if is_affirmative(self.config.options.get('replication', False)):
            replication_metrics = self._collect_replication_metrics(
                db, results, above_560)
            metrics.update(replication_metrics)
            self._check_replication_status(results)

        # "synthetic" metrics
        metrics.update(SYNTHETIC_VARS)
        self._compute_synthetic_results(results)

        # remove uncomputed metrics
        for k in SYNTHETIC_VARS:
            if k not in results:
                metrics.pop(k, None)

        # add duped metrics - reporting some as both rate and gauge
        dupes = [
            ('Table_locks_waited', 'Table_locks_waited_rate'),
            ('Table_locks_immediate', 'Table_locks_immediate_rate'),
        ]
        for src, dst in dupes:
            if src in results:
                results[dst] = results[src]

        self._submit_metrics(metrics, results, self.config.tags)

        # Collect custom query metrics
        # Max of 20 queries allowed
        if isinstance(self.config.queries, list):
            for check in self.config.queries[:self.config.max_custom_queries]:
                total_tags = self.config.tags + check.get('tags', [])
                self._collect_dict(check['type'],
                                   {check['field']: check['metric']},
                                   check['query'],
                                   db,
                                   tags=total_tags)

            if len(self.config.queries) > self.config.max_custom_queries:
                self.warning(
                    "Maximum number (%s) of custom queries reached.  Skipping the rest.",
                    self.config.max_custom_queries)

    def _collect_replication_metrics(self, db, results, above_560):
        # Get replica stats
        is_mariadb = self.version.flavor == "MariaDB"
        replication_channel = self.config.options.get('replication_channel')
        results.update(
            self._get_replica_stats(db, is_mariadb, replication_channel))
        nonblocking = is_affirmative(
            self.config.options.get('replication_non_blocking_status', False))
        results.update(self._get_slave_status(db, above_560, nonblocking))
        return REPLICA_VARS

    def _check_replication_status(self, results):
        # get slave running form global status page
        slave_running_status = AgentCheck.UNKNOWN
        # Slave_IO_Running: Whether the I/O thread for reading the source's binary log is running.
        # You want this to be Yes unless you have not yet started replication or have explicitly stopped it.
        slave_io_running = collect_type('Slave_IO_Running', results, dict)
        # Slave_SQL_Running: Whether the SQL thread for executing events in the relay log is running.
        slave_sql_running = collect_type('Slave_SQL_Running', results, dict)
        if slave_io_running:
            slave_io_running = any(v.lower().strip() == 'yes'
                                   for v in itervalues(slave_io_running))
        if slave_sql_running:
            slave_sql_running = any(v.lower().strip() == 'yes'
                                    for v in itervalues(slave_sql_running))
        binlog_running = results.get('Binlog_enabled', False)
        # slaves will only be collected iff user has PROCESS privileges.
        slaves = collect_scalar('Slaves_connected', results)

        if not (slave_io_running is None and slave_sql_running is None):
            if not slave_io_running and not slave_sql_running:
                self.log.debug(
                    "Slave_IO_Running and Slave_SQL_Running are not ok")
                slave_running_status = AgentCheck.CRITICAL
            if not slave_io_running or not slave_sql_running:
                self.log.debug(
                    "Either Slave_IO_Running or Slave_SQL_Running are not ok")
                slave_running_status = AgentCheck.WARNING

        if slave_running_status == AgentCheck.UNKNOWN:
            if self._is_master(slaves, results):  # master
                if slaves > 0 and binlog_running:
                    self.log.debug(
                        "Host is master, there are replicas and binlog is running"
                    )
                    slave_running_status = AgentCheck.OK
                else:
                    slave_running_status = AgentCheck.WARNING
            else:  # replica (or standalone)
                if not (slave_io_running is None
                        and slave_sql_running is None):
                    if slave_io_running and slave_sql_running:
                        self.log.debug(
                            "Slave_IO_Running and Slave_SQL_Running are ok")
                        slave_running_status = AgentCheck.OK

        # deprecated in favor of service_check("mysql.replication.slave_running")
        self.gauge(self.SLAVE_SERVICE_CHECK_NAME,
                   1 if slave_running_status == AgentCheck.OK else 0,
                   tags=self.config.tags)
        self.service_check(self.SLAVE_SERVICE_CHECK_NAME,
                           slave_running_status,
                           tags=self.service_check_tags)

    def _collect_statement_metrics(self, db, tags):
        tags = self.service_check_tags + tags
        metrics = self._statement_metrics.collect_per_statement_metrics(db)
        for metric_name, metric_value, metric_tags in metrics:
            self.count(metric_name,
                       metric_value,
                       tags=list(set(tags + metric_tags)))

    def _is_master(self, slaves, results):
        # master uuid only collected in slaves
        master_host = collect_string('Master_Host', results)
        if slaves > 0 or not master_host:
            return True

        return False

    def _submit_metrics(self, variables, db_results, tags):
        for variable, metric in iteritems(variables):
            metric_name, metric_type = metric
            for tag, value in collect_all_scalars(variable, db_results):
                metric_tags = list(tags)
                if tag:
                    metric_tags.append(tag)
                if value is not None:
                    if metric_type == RATE:
                        self.rate(metric_name, value, tags=metric_tags)
                    elif metric_type == GAUGE:
                        self.gauge(metric_name, value, tags=metric_tags)
                    elif metric_type == COUNT:
                        self.count(metric_name, value, tags=metric_tags)
                    elif metric_type == MONOTONIC:
                        self.monotonic_count(metric_name,
                                             value,
                                             tags=metric_tags)

    def _collect_dict(self, metric_type, field_metric_map, query, db, tags):
        """
        Query status and get a dictionary back.
        Extract each field out of the dictionary
        and stuff it in the corresponding metric.

        query: show status...
        field_metric_map: {"Seconds_behind_master": "mysqlSecondsBehindMaster"}
        """
        try:
            with closing(db.cursor()) as cursor:
                cursor.execute(query)
                result = cursor.fetchone()
                if result is not None:
                    for field, metric in list(iteritems(field_metric_map)):
                        # Find the column name in the cursor description to identify the column index
                        # http://www.python.org/dev/peps/pep-0249/
                        # cursor.description is a tuple of (column_name, ..., ...)
                        try:
                            col_idx = [
                                d[0].lower() for d in cursor.description
                            ].index(field.lower())
                            self.log.debug("Collecting metric: %s", metric)
                            if result[col_idx] is not None:
                                self.log.debug("Collecting done, value %s",
                                               result[col_idx])
                                if metric_type == GAUGE:
                                    self.gauge(metric,
                                               float(result[col_idx]),
                                               tags=tags)
                                elif metric_type == RATE:
                                    self.rate(metric,
                                              float(result[col_idx]),
                                              tags=tags)
                                else:
                                    self.gauge(metric,
                                               float(result[col_idx]),
                                               tags=tags)
                            else:
                                self.log.debug(
                                    "Received value is None for index %d",
                                    col_idx)
                        except ValueError:
                            self.log.exception(
                                "Cannot find %s in the columns %s", field,
                                cursor.description)
        except Exception:
            self.warning("Error while running %s\n%s", query,
                         traceback.format_exc())
            self.log.exception("Error while running %s", query)

    def _collect_system_metrics(self, host, db, tags):
        pid = None
        # The server needs to run locally, accessed by TCP or socket
        if host in ["localhost", "127.0.0.1", "0.0.0.0"] or db.port == long(0):
            pid = self._get_server_pid(db)

        if pid:
            self.log.debug("System metrics for mysql w/ pid: %s", pid)
            # At last, get mysql cpu data out of psutil or procfs

            try:
                ucpu, scpu = None, None
                if PSUTIL_AVAILABLE:
                    proc = psutil.Process(pid)

                    ucpu = proc.cpu_times()[0]
                    scpu = proc.cpu_times()[1]

                if ucpu and scpu:
                    self.rate("mysql.performance.user_time", ucpu, tags=tags)
                    # should really be system_time
                    self.rate("mysql.performance.kernel_time", scpu, tags=tags)
                    self.rate("mysql.performance.cpu_time",
                              ucpu + scpu,
                              tags=tags)

            except Exception:
                self.warning(
                    "Error while reading mysql (pid: %s) procfs data\n%s", pid,
                    traceback.format_exc())

    def _get_pid_file_variable(self, db):
        """
        Get the `pid_file` variable
        """
        pid_file = None
        try:
            with closing(db.cursor()) as cursor:
                cursor.execute("SHOW VARIABLES LIKE 'pid_file'")
                pid_file = cursor.fetchone()[1]
        except Exception:
            self.warning("Error while fetching pid_file variable of MySQL.")

        return pid_file

    def _get_server_pid(self, db):
        pid = None

        # Try to get pid from pid file, it can fail for permission reason
        pid_file = self._get_pid_file_variable(db)
        if pid_file is not None:
            self.log.debug("pid file: %s", str(pid_file))
            try:
                with open(pid_file, 'rb') as f:
                    pid = int(f.readline())
            except IOError:
                self.log.debug("Cannot read mysql pid file %s", pid_file)

        # If pid has not been found, read it from ps
        if pid is None and PSUTIL_AVAILABLE:
            for proc in psutil.process_iter():
                try:
                    if proc.name() == PROC_NAME:
                        pid = proc.pid
                except (psutil.AccessDenied, psutil.ZombieProcess,
                        psutil.NoSuchProcess):
                    continue
                except Exception:
                    self.log.exception(
                        "Error while fetching mysql pid from psutil")

        return pid

    @classmethod
    def _get_stats_from_status(cls, db):
        with closing(db.cursor()) as cursor:
            cursor.execute("SHOW /*!50002 GLOBAL */ STATUS;")
            results = dict(cursor.fetchall())

            return results

    @classmethod
    def _get_stats_from_variables(cls, db):
        with closing(db.cursor()) as cursor:
            cursor.execute("SHOW GLOBAL VARIABLES;")
            results = dict(cursor.fetchall())

            return results

    def _get_binary_log_stats(self, db):
        try:
            with closing(db.cursor()) as cursor:
                cursor.execute("SHOW BINARY LOGS;")
                cursor_results = cursor.fetchall()
                master_logs = {
                    result[0]: result[1]
                    for result in cursor_results
                }

                binary_log_space = 0
                for value in itervalues(master_logs):
                    binary_log_space += value

                return binary_log_space
        except (pymysql.err.InternalError, pymysql.err.OperationalError) as e:
            self.warning(
                "Privileges error accessing the BINARY LOGS (must grant REPLICATION CLIENT): %s",
                e)
            return None

    def _is_innodb_engine_enabled(self, db):
        # Whether InnoDB engine is available or not can be found out either
        # from the output of SHOW ENGINES or from information_schema.ENGINES
        # table. Later is choosen because that involves no string parsing.
        try:
            with closing(db.cursor()) as cursor:
                cursor.execute(SQL_INNODB_ENGINES)
                return cursor.rowcount > 0

        except (pymysql.err.InternalError, pymysql.err.OperationalError,
                pymysql.err.NotSupportedError) as e:
            self.warning(
                "Possibly innodb stats unavailable - error querying engines table: %s",
                e)
            return False

    def _get_replica_stats(self, db, is_mariadb, replication_channel):
        replica_results = defaultdict(dict)
        try:
            with closing(db.cursor(pymysql.cursors.DictCursor)) as cursor:
                if is_mariadb and replication_channel:
                    cursor.execute(
                        "SET @@default_master_connection = '{0}';".format(
                            replication_channel))
                    cursor.execute("SHOW SLAVE STATUS;")
                elif replication_channel:
                    cursor.execute(
                        "SHOW SLAVE STATUS FOR CHANNEL '{0}';".format(
                            replication_channel))
                else:
                    cursor.execute("SHOW SLAVE STATUS;")

                results = cursor.fetchall()
                self.log.debug("Getting replication status: %s", results)
                for slave_result in results:
                    # MySQL <5.7 does not have Channel_Name.
                    # For MySQL >=5.7 'Channel_Name' is set to an empty string by default
                    channel = replication_channel or slave_result.get(
                        'Channel_Name') or 'default'
                    for key, value in iteritems(slave_result):
                        if value is not None:
                            replica_results[key]['channel:{0}'.format(
                                channel)] = value
        except (pymysql.err.InternalError, pymysql.err.OperationalError) as e:
            errno, msg = e.args
            if errno == 1617 and msg == "There is no master connection '{0}'".format(
                    replication_channel):
                # MariaDB complains when you try to get slave status with a
                # connection name on the master, without connection name it
                # responds an empty string as expected.
                # Mysql behaves the same with or without connection name.
                pass
            else:
                self.warning(
                    "Privileges error getting replication status (must grant REPLICATION CLIENT): %s",
                    e)

        try:
            with closing(db.cursor(pymysql.cursors.DictCursor)) as cursor:
                cursor.execute("SHOW MASTER STATUS;")
                binlog_results = cursor.fetchone()
                if binlog_results:
                    replica_results.update({'Binlog_enabled': True})
        except (pymysql.err.InternalError, pymysql.err.OperationalError) as e:
            self.warning(
                "Privileges error getting binlog information (must grant REPLICATION CLIENT): %s",
                e)

        return replica_results

    def _get_slave_status(self, db, above_560, nonblocking):
        """
        Retrieve the slaves' statuses using:
        1. The `performance_schema.threads` table. Non-blocking, requires version > 5.6.0
        2. The `information_schema.processlist` table. Blocking
        """
        try:
            with closing(db.cursor()) as cursor:
                if above_560 and nonblocking:
                    # Query `performance_schema.threads` instead of `
                    # information_schema.processlist` to avoid mutex impact on performance.
                    cursor.execute(SQL_WORKER_THREADS)
                else:
                    cursor.execute(SQL_PROCESS_LIST)
                slave_results = cursor.fetchall()
                slaves = 0
                for _ in slave_results:
                    slaves += 1

                return {'Slaves_connected': slaves}

        except (pymysql.err.InternalError, pymysql.err.OperationalError) as e:
            self.warning(
                "Privileges error accessing the process tables (must grant PROCESS): %s",
                e)
            return {}

    @classmethod
    def _are_values_numeric(cls, array):
        return all(v.isdigit() for v in array)

    def _get_variable_enabled(self, results, var):
        enabled = collect_string(var, results)
        return enabled and enabled.lower().strip() == 'on'

    def _get_query_exec_time_95th_us(self, db):
        # Fetches the 95th percentile query execution time and returns the value
        # in microseconds
        try:
            with closing(db.cursor()) as cursor:
                cursor.execute(SQL_95TH_PERCENTILE)

                if cursor.rowcount < 1:
                    self.warning(
                        "Failed to fetch records from the perf schema \
                                 'events_statements_summary_by_digest' table.")
                    return None

                row = cursor.fetchone()
                query_exec_time_95th_per = row[0]

                return query_exec_time_95th_per
        except (pymysql.err.InternalError, pymysql.err.OperationalError) as e:
            self.warning(
                "95th percentile performance metrics unavailable at this time: %s",
                e)
            return None

    def _query_exec_time_per_schema(self, db):
        # Fetches the avg query execution time per schema and returns the
        # value in microseconds
        try:
            with closing(db.cursor()) as cursor:
                cursor.execute(SQL_AVG_QUERY_RUN_TIME)

                if cursor.rowcount < 1:
                    self.warning(
                        "Failed to fetch records from the perf schema \
                                 'events_statements_summary_by_digest' table.")
                    return None

                schema_query_avg_run_time = {}
                for row in cursor.fetchall():
                    schema_name = str(row[0])
                    avg_us = long(row[1])

                    # set the tag as the dictionary key
                    schema_query_avg_run_time["schema:{0}".format(
                        schema_name)] = avg_us

                return schema_query_avg_run_time
        except (pymysql.err.InternalError, pymysql.err.OperationalError) as e:
            self.warning(
                "Avg exec time performance metrics unavailable at this time: %s",
                e)
            return None

    def _query_size_per_schema(self, db):
        # Fetches the avg query execution time per schema and returns the
        # value in microseconds
        try:
            with closing(db.cursor()) as cursor:
                cursor.execute(SQL_QUERY_SCHEMA_SIZE)

                if cursor.rowcount < 1:
                    self.warning(
                        "Failed to fetch records from the information schema 'tables' table."
                    )
                    return None

                schema_size = {}
                for row in cursor.fetchall():
                    schema_name = str(row[0])
                    size = long(row[1])

                    # set the tag as the dictionary key
                    schema_size["schema:{0}".format(schema_name)] = size

                return schema_size
        except (pymysql.err.InternalError, pymysql.err.OperationalError) as e:
            self.warning(
                "Avg exec time performance metrics unavailable at this time: %s",
                e)

        return {}

    def _compute_synthetic_results(self, results):
        if ('Qcache_hits' in results) and ('Qcache_inserts'
                                           in results) and ('Qcache_not_cached'
                                                            in results):
            if not int(results['Qcache_hits']):
                results['Qcache_utilization'] = 0
            else:
                results['Qcache_utilization'] = (
                    float(results['Qcache_hits']) /
                    (int(results['Qcache_inserts']) +
                     int(results['Qcache_not_cached']) +
                     int(results['Qcache_hits'])) * 100)

            if all(v is not None
                   for v in (self._qcache_hits, self._qcache_inserts,
                             self._qcache_not_cached)):
                if not (int(results['Qcache_hits']) - self._qcache_hits):
                    results['Qcache_instant_utilization'] = 0
                else:
                    top = float(results['Qcache_hits']) - self._qcache_hits
                    bottom = (
                        (int(results['Qcache_inserts']) - self._qcache_inserts)
                        + (int(results['Qcache_not_cached']) -
                           self._qcache_not_cached) +
                        (int(results['Qcache_hits']) - self._qcache_hits))
                    results['Qcache_instant_utilization'] = (top /
                                                             bottom) * 100

            # update all three, or none - for consistent samples.
            self._qcache_hits = int(results['Qcache_hits'])
            self._qcache_inserts = int(results['Qcache_inserts'])
            self._qcache_not_cached = int(results['Qcache_not_cached'])
Пример #27
0
class TeradataCheck(AgentCheck, ConfigMixin):
    __NAMESPACE__ = 'teradata'

    def __init__(self, name, init_config, instances):
        super(TeradataCheck, self).__init__(name, init_config, instances)

        self._connect_params = None
        self._connection = None
        self._tags = []
        self._query_errors = 0
        self._tables_filter = None

        manager_queries = deepcopy(DEFAULT_QUERIES)
        if is_affirmative(self.instance.get('collect_res_usage_metrics',
                                            False)):
            manager_queries.extend(COLLECT_RES_USAGE)
        if is_affirmative(
                self.instance.get('collect_table_disk_metrics', False)):
            manager_queries.extend(COLLECT_ALL_SPACE)

        self._query_manager = QueryManager(
            self,
            self._execute_query_raw,
            queries=manager_queries,
            tags=self._tags,
            error_handler=self._executor_error_handler,
        )
        self.check_initializations.append(self.initialize_config)
        self.check_initializations.append(self._query_manager.compile_queries)

    def check(self, _):
        # type: (Any) -> None
        self._query_errors = 0

        try:
            with self.connect() as conn:
                if conn:
                    self._connection = conn
                    self._query_manager.execute()
            self.submit_health_checks()
        except Exception as e:
            self.service_check(SERVICE_CHECK_CONNECT,
                               ServiceCheck.CRITICAL,
                               tags=self._tags)
            raise e

    def initialize_config(self):
        # type: (Any) -> None
        self._connect_params = json.dumps({
            'host':
            self.config.server,
            'account':
            self.config.account,
            'database':
            self.config.database,
            'dbs_port':
            str(self.config.port),
            'logmech':
            self.config.auth_mechanism,
            'logdata':
            self.config.auth_data,
            'user':
            self.config.username,
            'password':
            self.config.password,
            'https_port':
            str(self.config.https_port),
            'sslmode':
            self.config.ssl_mode,
            'sslprotocol':
            self.config.ssl_protocol,
        })

        global_tags = [
            'teradata_server:{}'.format(self.instance.get('server')),
            'teradata_port:{}'.format(self.instance.get('port', 1025)),
        ]
        self._tags = list(self.config.tags)
        self._tags.extend(global_tags)
        self._query_manager.tags = self._tags

        self._tables_filter = create_tables_filter(self.config.tables)

    def _execute_query_raw(self, query):
        # type: (AnyStr) -> Iterable[Sequence]
        with closing(self._connection.cursor()) as cursor:
            query = query.format(self.config.database)
            cursor.execute(query)
            if cursor.rowcount < 1:
                self._query_errors += 1
                self.log.warning('Failed to fetch records from query: `%s`.',
                                 query)
                return None
            for row in cursor.fetchall():
                query_name = re.search(r'(DBC.[^\s]+)', query).group(1)
                try:
                    yield self._queries_processor(row, query_name)
                except Exception as e:
                    self.log.debug(
                        'Unable to process row returned from query "%s", skipping row %s. %s',
                        query_name, row, e)
                    yield row

    def _executor_error_handler(self, error):
        # type: (AnyStr) -> AnyStr
        self._query_errors += 1
        return error

    @contextmanager
    def connect(self):
        # type: () -> Iterator[teradatasql.connection]
        conn = None
        if TERADATASQL_IMPORT_ERROR:
            self.log.error(
                'Teradata SQL Driver module is unavailable. Please double check your installation and refer to the '
                'Datadog documentation for more information. %s',
                TERADATASQL_IMPORT_ERROR,
            )
            raise TERADATASQL_IMPORT_ERROR
        self.log.info('Connecting to Teradata database %s on server %s.',
                      self.config.database, self.config.server)
        try:
            conn = teradatasql.connect(self._connect_params)
            self.log.info('Connected to Teradata.')
            yield conn
        except Exception as e:
            self.log.error('Unable to connect to Teradata. %s.', e)
            raise e
        finally:
            if conn:
                conn.close()

    def submit_health_checks(self):
        # type: () -> None
        connect_status = ServiceCheck.OK
        query_status = ServiceCheck.CRITICAL if self._query_errors else ServiceCheck.OK

        self.service_check(SERVICE_CHECK_QUERY, query_status, tags=self._tags)
        self.service_check(SERVICE_CHECK_CONNECT,
                           connect_status,
                           tags=self._tags)

    def _queries_processor(self, row, query_name):
        # type: (Sequence, AnyStr) -> Sequence
        """
        Validate timestamps, filter tables, and normalize empty tags.
        """
        unprocessed_row = row

        # Return database version immediately
        if query_name == 'DBC.DBCInfoV':
            submit_version(self, row)
            return unprocessed_row

        # Only Resource Usage rows include timestamps and also do not include tags.
        if query_name == 'DBC.ResSpmaView':
            processed_row = timestamp_validator(self, unprocessed_row)
            return processed_row

        # Only AllSpaceV rows include table tags
        if (query_name == 'DBC.AllSpaceV'
                and is_affirmative(self.config.collect_table_disk_metrics)
                and self._tables_filter):
            tables_filtered_row = filter_tables(self._tables_filter,
                                                unprocessed_row)
            if tables_filtered_row:
                processed_row = tags_normalizer(tables_filtered_row,
                                                query_name)
                return processed_row
            # Discard row if empty (table is filtered out)
            return tables_filtered_row
        processed_row = tags_normalizer(unprocessed_row, query_name)
        self.log.trace('Row processor returned: %s. \nFrom query: "%s"',
                       processed_row, query_name)
        return processed_row
Пример #28
0
class SQLServer(AgentCheck):
    __NAMESPACE__ = 'sqlserver'

    def __init__(self, name, init_config, instances):
        super(SQLServer, self).__init__(name, init_config, instances)

        self.connection = None
        self.failed_connections = {}
        self.instance_metrics = []
        self.instance_per_type_metrics = defaultdict(list)
        self.do_check = True

        self.autodiscovery = is_affirmative(self.instance.get('database_autodiscovery'))
        self.autodiscovery_include = self.instance.get('autodiscovery_include', ['.*'])
        self.autodiscovery_exclude = self.instance.get('autodiscovery_exclude', [])
        self._compile_patterns()
        self.autodiscovery_interval = self.instance.get('autodiscovery_interval', DEFAULT_AUTODISCOVERY_INTERVAL)
        self.databases = set()
        self.ad_last_check = 0

        self.proc = self.instance.get('stored_procedure')
        self.proc_type_mapping = {'gauge': self.gauge, 'rate': self.rate, 'histogram': self.histogram}
        self.custom_metrics = init_config.get('custom_metrics', [])

        # use QueryManager to process custom queries
        self._query_manager = QueryManager(self, self.execute_query_raw, queries=[], tags=self.instance.get("tags", []))
        self.check_initializations.append(self.config_checks)
        self.check_initializations.append(self._query_manager.compile_queries)
        self.check_initializations.append(self.initialize_connection)

    def config_checks(self):
        if self.autodiscovery and self.instance.get('database'):
            self.log.warning(
                'sqlserver `database_autodiscovery` and `database` options defined in same instance - '
                'autodiscovery will take precedence.'
            )
        if not self.autodiscovery and (self.autodiscovery_include or self.autodiscovery_exclude):
            self.log.warning(
                "Autodiscovery is disabled, autodiscovery_include and autodiscovery_exclude will be ignored"
            )

    def initialize_connection(self):
        self.connection = Connection(self.init_config, self.instance, self.handle_service_check)

        # Pre-process the list of metrics to collect
        try:
            # check to see if the database exists before we try any connections to it
            db_exists, context = self.connection.check_database()

            if db_exists:
                if self.instance.get('stored_procedure') is None:
                    with self.connection.open_managed_default_connection():
                        with self.connection.get_managed_cursor() as cursor:
                            self.autodiscover_databases(cursor)
                        self._make_metric_list_to_collect(self.custom_metrics)
            else:
                # How much do we care that the DB doesn't exist?
                ignore = is_affirmative(self.instance.get("ignore_missing_database", False))
                if ignore is not None and ignore:
                    # not much : we expect it. leave checks disabled
                    self.do_check = False
                    self.log.warning("Database %s does not exist. Disabling checks for this instance.", context)
                else:
                    # yes we do. Keep trying
                    msg = "Database {} does not exist. Please resolve invalid database and restart agent".format(
                        context
                    )
                    raise ConfigurationError(msg)

        except SQLConnectionError as e:
            self.log.exception("Error connecting to database: %s", e)
        except ConfigurationError:
            raise
        except Exception as e:
            self.log.exception("Initialization exception %s", e)

    def handle_service_check(self, status, host, database, message=None):
        custom_tags = self.instance.get("tags", [])
        if custom_tags is None:
            custom_tags = []
        service_check_tags = ['host:{}'.format(host), 'db:{}'.format(database)]
        service_check_tags.extend(custom_tags)
        service_check_tags = list(set(service_check_tags))

        self.service_check(SERVICE_CHECK_NAME, status, tags=service_check_tags, message=message, raw=True)

    def _compile_patterns(self):
        self._include_patterns = self._compile_valid_patterns(self.autodiscovery_include)
        self._exclude_patterns = self._compile_valid_patterns(self.autodiscovery_exclude)

    def _compile_valid_patterns(self, patterns):
        valid_patterns = []

        for pattern in patterns:
            # Ignore empty patterns as they match everything
            if not pattern:
                continue

            try:
                re.compile(pattern, re.IGNORECASE)
            except Exception:
                self.log.warning('%s is not a valid regular expression and will be ignored', pattern)
            else:
                valid_patterns.append(pattern)

        if valid_patterns:
            return re.compile('|'.join(valid_patterns), re.IGNORECASE)
        else:
            # create unmatchable regex - https://stackoverflow.com/a/1845097/2157429
            return re.compile(r'(?!x)x')

    def autodiscover_databases(self, cursor):
        if not self.autodiscovery:
            return False

        now = time.time()
        if now - self.ad_last_check > self.autodiscovery_interval:
            self.log.info('Performing database autodiscovery')
            cursor.execute(AUTODISCOVERY_QUERY)
            all_dbs = set(row.name for row in cursor.fetchall())
            excluded_dbs = set([d for d in all_dbs if self._exclude_patterns.match(d)])
            included_dbs = set([d for d in all_dbs if self._include_patterns.match(d)])

            self.log.debug(
                'Autodiscovered databases: %s, excluding: %s, including: %s', all_dbs, excluded_dbs, included_dbs
            )

            # keep included dbs but remove any that were explicitly excluded
            filtered_dbs = all_dbs.intersection(included_dbs) - excluded_dbs

            self.log.debug('Resulting filtered databases: %s', filtered_dbs)
            self.ad_last_check = now

            if filtered_dbs != self.databases:
                self.log.debug('Databases updated from previous autodiscovery check.')
                self.databases = filtered_dbs
                return True
        return False

    def _make_metric_list_to_collect(self, custom_metrics):
        """
        Store the list of metrics to collect by instance_key.
        Will also create and cache cursors to query the db.
        """

        metrics_to_collect = []
        tags = self.instance.get('tags', [])

        # Load instance-level (previously Performance) metrics)
        # If several check instances are querying the same server host, it can be wise to turn these off
        # to avoid sending duplicate metrics
        if is_affirmative(self.instance.get('include_instance_metrics', True)):
            self._add_performance_counters(
                chain(INSTANCE_METRICS, INSTANCE_METRICS_TOTAL), metrics_to_collect, tags, db=None
            )

        # populated through autodiscovery
        if self.databases:
            for db in self.databases:
                self._add_performance_counters(INSTANCE_METRICS_TOTAL, metrics_to_collect, tags, db=db)

        # Load database statistics
        for name, table, column in DATABASE_METRICS:
            # include database as a filter option
            db_names = self.databases or [self.instance.get('database', self.connection.DEFAULT_DATABASE)]
            for db_name in db_names:
                cfg = {'name': name, 'table': table, 'column': column, 'instance_name': db_name, 'tags': tags}
                metrics_to_collect.append(self.typed_metric(cfg_inst=cfg, table=table, column=column))

        # Load AlwaysOn metrics
        if is_affirmative(self.instance.get('include_ao_metrics', False)):
            for name, table, column in AO_METRICS + AO_METRICS_PRIMARY + AO_METRICS_SECONDARY:
                db_name = 'master'
                cfg = {
                    'name': name,
                    'table': table,
                    'column': column,
                    'instance_name': db_name,
                    'tags': tags,
                    'ao_database': self.instance.get('ao_database', None),
                    'availability_group': self.instance.get('availability_group', None),
                    'only_emit_local': is_affirmative(self.instance.get('only_emit_local', False)),
                }
                metrics_to_collect.append(self.typed_metric(cfg_inst=cfg, table=table, column=column))

        # Load FCI metrics
        if is_affirmative(self.instance.get('include_fci_metrics', False)):
            for name, table, column in FCI_METRICS:
                cfg = {
                    'name': name,
                    'table': table,
                    'column': column,
                    'tags': tags,
                }
                metrics_to_collect.append(self.typed_metric(cfg_inst=cfg, table=table, column=column))

        # Load metrics from scheduler and task tables, if enabled
        if is_affirmative(self.instance.get('include_task_scheduler_metrics', False)):
            for name, table, column in TASK_SCHEDULER_METRICS:
                cfg = {'name': name, 'table': table, 'column': column, 'tags': tags}
                metrics_to_collect.append(self.typed_metric(cfg_inst=cfg, table=table, column=column))

        # Load DB Fragmentation metrics
        if is_affirmative(self.instance.get('include_db_fragmentation_metrics', False)):
            db_fragmentation_object_names = self.instance.get('db_fragmentation_object_names', [])
            db_names = self.databases or [self.instance.get('database', self.connection.DEFAULT_DATABASE)]

            if not db_fragmentation_object_names:
                self.log.debug(
                    "No fragmentation object names specified, will return fragmentation metrics for all "
                    "object_ids of current database(s): %s",
                    db_names,
                )

            for db_name in db_names:
                for name, table, column in DATABASE_FRAGMENTATION_METRICS:
                    cfg = {
                        'name': name,
                        'table': table,
                        'column': column,
                        'instance_name': db_name,
                        'tags': tags,
                        'db_fragmentation_object_names': db_fragmentation_object_names,
                    }
                    metrics_to_collect.append(self.typed_metric(cfg_inst=cfg, table=table, column=column))

        # Load any custom metrics from conf.d/sqlserver.yaml
        for cfg in custom_metrics:
            sql_type = None
            base_name = None

            custom_tags = tags + cfg.get('tags', [])
            cfg['tags'] = custom_tags

            db_table = cfg.get('table', DEFAULT_PERFORMANCE_TABLE)
            if db_table not in VALID_TABLES:
                self.log.error('%s has an invalid table name: %s', cfg['name'], db_table)
                continue

            if cfg.get('database', None) and cfg.get('database') != self.instance.get('database'):
                self.log.debug(
                    'Skipping custom metric %s for database %s, check instance configured for database %s',
                    cfg['name'],
                    cfg.get('database'),
                    self.instance.get('database'),
                )
                continue

            if db_table == DEFAULT_PERFORMANCE_TABLE:
                user_type = cfg.get('type')
                if user_type is not None and user_type not in VALID_METRIC_TYPES:
                    self.log.error('%s has an invalid metric type: %s', cfg['name'], user_type)
                sql_type = None
                try:
                    if user_type is None:
                        sql_type, base_name = self.get_sql_type(cfg['counter_name'])
                except Exception:
                    self.log.warning("Can't load the metric %s, ignoring", cfg['name'], exc_info=True)
                    continue

                metrics_to_collect.append(
                    self.typed_metric(
                        cfg_inst=cfg, table=db_table, base_name=base_name, user_type=user_type, sql_type=sql_type
                    )
                )

            else:
                for column in cfg['columns']:
                    metrics_to_collect.append(
                        self.typed_metric(
                            cfg_inst=cfg, table=db_table, base_name=base_name, sql_type=sql_type, column=column
                        )
                    )

        self.instance_metrics = metrics_to_collect
        self.log.debug("metrics to collect %s", metrics_to_collect)

        # create an organized grouping of metric names to their metric classes
        for m in metrics_to_collect:
            cls = m.__class__.__name__
            name = m.sql_name or m.column
            self.log.debug("Adding metric class %s named %s", cls, name)

            self.instance_per_type_metrics[cls].append(name)
            if m.base_name:
                self.instance_per_type_metrics[cls].append(m.base_name)

    def _add_performance_counters(self, metrics, metrics_to_collect, tags, db=None):
        for name, counter_name, instance_name in metrics:
            try:
                sql_type, base_name = self.get_sql_type(counter_name)
                cfg = {
                    'name': name,
                    'counter_name': counter_name,
                    'instance_name': db or instance_name,
                    'tags': tags,
                }

                metrics_to_collect.append(
                    self.typed_metric(
                        cfg_inst=cfg, table=DEFAULT_PERFORMANCE_TABLE, base_name=base_name, sql_type=sql_type
                    )
                )
            except SQLConnectionError:
                raise
            except Exception:
                self.log.warning("Can't load the metric %s, ignoring", name, exc_info=True)
                continue

    def get_sql_type(self, counter_name):
        """
        Return the type of the performance counter so that we can report it to
        Datadog correctly
        If the sql_type is one that needs a base (PERF_RAW_LARGE_FRACTION and
        PERF_AVERAGE_BULK), the name of the base counter will also be returned
        """
        with self.connection.get_managed_cursor() as cursor:
            cursor.execute(COUNTER_TYPE_QUERY, (counter_name,))
            (sql_type,) = cursor.fetchone()
            if sql_type == PERF_LARGE_RAW_BASE:
                self.log.warning("Metric %s is of type Base and shouldn't be reported this way", counter_name)
            base_name = None
            if sql_type in [PERF_AVERAGE_BULK, PERF_RAW_LARGE_FRACTION]:
                # This is an ugly hack. For certains type of metric (PERF_RAW_LARGE_FRACTION
                # and PERF_AVERAGE_BULK), we need two metrics: the metrics specified and
                # a base metrics to get the ratio. There is no unique schema so we generate
                # the possible candidates and we look at which ones exist in the db.
                candidates = (
                    counter_name + " base",
                    counter_name.replace("(ms)", "base"),
                    counter_name.replace("Avg ", "") + " base",
                )
                try:
                    cursor.execute(BASE_NAME_QUERY, candidates)
                    base_name = cursor.fetchone().counter_name.strip()
                    self.log.debug("Got base metric: %s for metric: %s", base_name, counter_name)
                except Exception as e:
                    self.log.warning("Could not get counter_name of base for metric: %s", e)

        return sql_type, base_name

    def typed_metric(self, cfg_inst, table, base_name=None, user_type=None, sql_type=None, column=None):
        """
        Create the appropriate BaseSqlServerMetric object, each implementing its method to
        fetch the metrics properly.
        If a `type` was specified in the config, it is used to report the value
        directly fetched from SQLServer. Otherwise, it is decided based on the
        sql_type, according to microsoft's documentation.
        """
        if table == DEFAULT_PERFORMANCE_TABLE:
            metric_type_mapping = {
                PERF_COUNTER_BULK_COUNT: (self.rate, metrics.SqlSimpleMetric),
                PERF_COUNTER_LARGE_RAWCOUNT: (self.gauge, metrics.SqlSimpleMetric),
                PERF_LARGE_RAW_BASE: (self.gauge, metrics.SqlSimpleMetric),
                PERF_RAW_LARGE_FRACTION: (self.gauge, metrics.SqlFractionMetric),
                PERF_AVERAGE_BULK: (self.gauge, metrics.SqlIncrFractionMetric),
            }
            if user_type is not None:
                # user type overrides any other value
                metric_type = getattr(self, user_type)
                cls = metrics.SqlSimpleMetric

            else:
                metric_type, cls = metric_type_mapping[sql_type]
        else:
            # Lookup metrics classes by their associated table
            metric_type_str, cls = metrics.TABLE_MAPPING[table]
            metric_type = getattr(self, metric_type_str)

        return cls(cfg_inst, base_name, metric_type, column, self.log)

    def check(self, _):
        if self.do_check:
            if self.proc:
                self.do_stored_procedure_check()
            else:
                self.collect_metrics()
        else:
            self.log.debug("Skipping check")

    def collect_metrics(self):
        """Fetch the metrics from all of the associated database tables."""

        with self.connection.open_managed_default_connection():
            with self.connection.get_managed_cursor() as cursor:
                # initiate autodiscovery or if the server was down at check __init__ key could be missing.
                if self.autodiscover_databases(cursor) or not self.instance_metrics:
                    self._make_metric_list_to_collect(self.custom_metrics)

                instance_results = {}

                # Execute the `fetch_all` operations first to minimize the database calls
                for cls, metric_names in six.iteritems(self.instance_per_type_metrics):
                    if not metric_names:
                        instance_results[cls] = None, None
                    else:
                        try:
                            rows, cols = getattr(metrics, cls).fetch_all_values(cursor, metric_names, self.log)
                        except Exception as e:
                            self.log.error("Error running `fetch_all` for metrics %s - skipping.  Error: %s", cls, e)
                            rows, cols = None, None

                        instance_results[cls] = rows, cols

                # Using the cached data, extract and report individual metrics
                for metric in self.instance_metrics:
                    if type(metric) is metrics.SqlIncrFractionMetric:
                        # special case, since it uses the same results as SqlFractionMetric
                        rows, cols = instance_results['SqlFractionMetric']
                        if rows is not None:
                            metric.fetch_metric(rows, cols)
                    else:
                        rows, cols = instance_results[metric.__class__.__name__]
                        if rows is not None:
                            metric.fetch_metric(rows, cols)

            # reuse connection for any custom queries
            self._query_manager.execute()

    def execute_query_raw(self, query):
        with self.connection.get_managed_cursor() as cursor:
            cursor.execute(query)
            return cursor.fetchall()

    def do_stored_procedure_check(self):
        """
        Fetch the metrics from the stored proc
        """

        proc = self.proc
        guardSql = self.instance.get('proc_only_if')
        custom_tags = self.instance.get("tags", [])

        if (guardSql and self.proc_check_guard(guardSql)) or not guardSql:
            self.connection.open_db_connections(self.connection.DEFAULT_DB_KEY)
            cursor = self.connection.get_cursor(self.connection.DEFAULT_DB_KEY)

            try:
                self.log.debug("Calling Stored Procedure : %s", proc)
                if self.connection.get_connector() == 'adodbapi':
                    cursor.callproc(proc)
                else:
                    # pyodbc does not support callproc; use execute instead.
                    # Reference: https://github.com/mkleehammer/pyodbc/wiki/Calling-Stored-Procedures
                    call_proc = '{{CALL {}}}'.format(proc)
                    cursor.execute(call_proc)

                rows = cursor.fetchall()
                self.log.debug("Row count (%s) : %s", proc, cursor.rowcount)

                for row in rows:
                    tags = [] if row.tags is None or row.tags == '' else row.tags.split(',')
                    tags.extend(custom_tags)

                    if row.type.lower() in self.proc_type_mapping:
                        self.proc_type_mapping[row.type](row.metric, row.value, tags, raw=True)
                    else:
                        self.log.warning(
                            '%s is not a recognised type from procedure %s, metric %s', row.type, proc, row.metric
                        )

            except Exception as e:
                self.log.warning("Could not call procedure %s: %s", proc, e)
                raise e

            self.connection.close_cursor(cursor)
            self.connection.close_db_connections(self.connection.DEFAULT_DB_KEY)
        else:
            self.log.info("Skipping call to %s due to only_if", proc)

    def proc_check_guard(self, sql):
        """
        check to see if the guard SQL returns a single column containing 0 or 1
        We return true if 1, else False
        """
        self.connection.open_db_connections(self.connection.PROC_GUARD_DB_KEY)
        cursor = self.connection.get_cursor(self.connection.PROC_GUARD_DB_KEY)

        should_run = False
        try:
            cursor.execute(sql, ())
            result = cursor.fetchone()
            should_run = result[0] == 1
        except Exception as e:
            self.log.error("Failed to run proc_only_if sql %s : %s", sql, e)

        self.connection.close_cursor(cursor)
        self.connection.close_db_connections(self.connection.PROC_GUARD_DB_KEY)
        return should_run
Пример #29
0
class RethinkDBCheck(AgentCheck):
    """
    Collect metrics from a RethinkDB cluster.
    """

    __NAMESPACE__ = 'rethinkdb'
    SERVICE_CHECK_CONNECT = 'can_connect'

    def __init__(self, *args, **kwargs):
        # type: (*Any, **Any) -> None
        super(RethinkDBCheck, self).__init__(*args, **kwargs)

        self._config = Config(cast(Instance, self.instance))

        if self._config.password:
            self.register_secret(self._config.password)

        self._conn = None  # type: Optional[rethinkdb.net.Connection]

        manager_queries = [
            queries.ClusterMetrics,
            queries.ServerMetrics,
            queries.DatabaseConfigMetrics,
            queries.DatabaseTableMetrics,
            queries.TableConfigMetrics,
            queries.ReplicaMetrics,
            queries.ShardMetrics,
            queries.JobMetrics,
            queries.CurrentIssuesMetrics,
        ]  # type: list

        if self.is_metadata_collection_enabled:
            manager_queries.append(queries.VersionMetadata)

        self._query_manager = QueryManager(
            self,
            executor=self._execute_raw_query,
            queries=manager_queries,
            tags=self._config.tags,
        )
        self._query_funcs = {}  # type: Dict[str, Callable]

        self.check_initializations.append(self._query_manager.compile_queries)

    def _execute_raw_query(self, query):
        # type: (str) -> List[tuple]
        query_func = self._query_funcs.get(query)

        if query_func is None:
            # QueryManager only supports `str` queries.
            # So here's the workaround: we make `query` refer to the import paths of query functions, then import here.
            # Cache the results so imports only happen on the first check run.
            module_name, _, func_name = query.partition(':')
            module = importlib.import_module(module_name, package='datadog_checks.rethinkdb')
            query_func = getattr(module, func_name)
            self._query_funcs[query] = query_func

        return query_func(self._conn)

    @contextmanager
    def connect_submitting_service_checks(self):
        # type: () -> Iterator[None]
        config = self._config
        tags = config.service_check_tags

        try:
            with rethinkdb.r.connect(
                host=config.host,
                port=config.port,
                user=config.user,
                password=config.password,
                ssl={'ca_certs': config.tls_ca_cert} if config.tls_ca_cert is not None else {},
            ) as conn:
                self._conn = conn
                yield
        except rethinkdb.errors.ReqlDriverError as exc:
            message = 'Could not connect to RethinkDB server: {!r}'.format(exc)
            self.log.error(message)
            self.service_check(self.SERVICE_CHECK_CONNECT, self.CRITICAL, tags=tags, message=message)
            raise
        except Exception as exc:
            message = 'Unexpected error while executing RethinkDB check: {!r}'.format(exc)
            self.log.error(message)
            self.service_check(self.SERVICE_CHECK_CONNECT, self.CRITICAL, tags=tags, message=message)
            raise
        else:
            self.service_check(self.SERVICE_CHECK_CONNECT, self.OK, tags=tags)
        finally:
            self._conn = None

    def collect_metrics(self):  # Exposed for mocking purposes.
        # type: () -> None
        self._query_manager.execute()

    def check(self, instance):
        # type: (Any) -> None
        with self.connect_submitting_service_checks():
            self.collect_metrics()
class ClickhouseCheck(AgentCheck):
    __NAMESPACE__ = 'clickhouse'
    SERVICE_CHECK_CONNECT = 'can_connect'

    def __init__(self, name, init_config, instances):
        super(ClickhouseCheck, self).__init__(name, init_config, instances)

        self._server = self.instance.get('server', '')
        self._port = self.instance.get('port')
        self._db = self.instance.get('db', 'default')
        self._user = self.instance.get('user', 'default')
        self._password = self.instance.get('password', '')
        self._connect_timeout = float(self.instance.get('connect_timeout', 10))
        self._read_timeout = float(self.instance.get('read_timeout', 10))
        self._compression = self.instance.get('compression', False)
        self._tls_verify = is_affirmative(
            self.instance.get('tls_verify', False))
        self._tags = self.instance.get('tags', [])

        # Add global tags
        self._tags.append('server:{}'.format(self._server))
        self._tags.append('port:{}'.format(self._port))
        self._tags.append('db:{}'.format(self._db))

        self._error_sanitizer = ErrorSanitizer(self._password)
        self.check_initializations.append(self.validate_config)

        # We'll connect on the first check run
        self._client = None
        self.check_initializations.append(self.create_connection)

        self._query_manager = QueryManager(
            self,
            self.execute_query_raw,
            queries=[
                queries.SystemMetrics,
                queries.SystemEvents,
                queries.SystemAsynchronousMetrics,
                queries.SystemParts,
                queries.SystemReplicas,
                queries.SystemDictionaries,
            ],
            tags=self._tags,
            error_handler=self._error_sanitizer.clean,
        )
        self.check_initializations.append(self._query_manager.compile_queries)

    def check(self, _):
        self._query_manager.execute()
        self.collect_version()

    def collect_version(self):
        version = list(self.execute_query_raw('SELECT version()'))[0][0]

        # The version comes in like `19.15.2.2` though sometimes there is no patch part
        version_parts = {
            name: part
            for name, part in zip(('year', 'major', 'minor',
                                   'patch'), version.split('.'))
        }

        self.set_metadata('version',
                          version,
                          scheme='parts',
                          final_scheme='calver',
                          part_map=version_parts)

    def execute_query_raw(self, query):
        return self._client.execute_iter(query)

    def validate_config(self):
        if not self._server:
            raise ConfigurationError('the `server` setting is required')

    def create_connection(self):
        try:
            client = clickhouse_driver.Client(
                host=self._server,
                port=self._port,
                user=self._user,
                password=self._password,
                database=self._db,
                connect_timeout=self._connect_timeout,
                send_receive_timeout=self._read_timeout,
                sync_request_timeout=self._connect_timeout,
                compression=self._compression,
                secure=self._tls_verify,
                # Don't pollute the Agent logs
                settings={'calculate_text_stack_trace': False},
                # Make every client unique for server logs
                client_name='datadog-{}'.format(self.check_id),
            )
            client.connection.connect()
        except Exception as e:
            error = 'Unable to connect to ClickHouse: {}'.format(
                self._error_sanitizer.clean(self._error_sanitizer.scrub(
                    str(e))))
            self.service_check(self.SERVICE_CHECK_CONNECT,
                               self.CRITICAL,
                               message=error,
                               tags=self._tags)

            # When an exception is raised in the context of another one, both will be printed. To avoid
            # this we set the context to None. https://www.python.org/dev/peps/pep-0409/
            raise_from(type(e)(error), None)
        else:
            self.service_check(self.SERVICE_CHECK_CONNECT,
                               self.OK,
                               tags=self._tags)
            self._client = client