示例#1
0
    def check_health_v1(self, config, tags):
        url = config['api_url'] + '/sys/health'
        health_data = self.access_api(url, config, tags).json()

        cluster_name = health_data.get('cluster_name')
        if cluster_name:
            tags.append('cluster_name:{}'.format(cluster_name))

        vault_version = health_data.get('version')
        if vault_version:
            tags.append('vault_version:{}'.format(vault_version))

        unsealed = not is_affirmative(health_data.get('sealed'))
        if unsealed:
            self.service_check(self.SERVICE_CHECK_UNSEALED,
                               AgentCheck.OK,
                               tags=tags)
        else:
            self.service_check(self.SERVICE_CHECK_UNSEALED,
                               AgentCheck.CRITICAL,
                               tags=tags)

        initialized = is_affirmative(health_data.get('initialized'))
        if initialized:
            self.service_check(self.SERVICE_CHECK_INITIALIZED,
                               AgentCheck.OK,
                               tags=tags)
        else:
            self.service_check(self.SERVICE_CHECK_INITIALIZED,
                               AgentCheck.CRITICAL,
                               tags=tags)
示例#2
0
    def check(self, instance):
        try:
            directory = instance['directory']
        except KeyError:
            raise Exception('DirectoryCheck: missing `directory` in config')

        abs_directory = abspath(directory)
        name = instance.get('name', directory)
        pattern = instance.get('pattern')
        exclude_dirs = instance.get('exclude_dirs', [])
        exclude_dirs_pattern = re_compile(
            '|'.join(exclude_dirs)) if exclude_dirs else None
        recursive = is_affirmative(instance.get('recursive', False))
        dirtagname = instance.get('dirtagname', 'name')
        filetagname = instance.get('filetagname', 'filename')
        filegauges = is_affirmative(instance.get('filegauges', False))
        countonly = is_affirmative(instance.get('countonly', False))
        ignore_missing = is_affirmative(instance.get('ignore_missing', False))
        custom_tags = instance.get('tags', [])

        if not exists(abs_directory):
            if ignore_missing:
                self.log.info(
                    'DirectoryCheck: the directory `{}` does not exist. Skipping.'
                    .format(abs_directory))
                return

            raise Exception(
                'DirectoryCheck: the directory `{}` does not exist. Skipping.'.
                format(abs_directory))

        self._get_stats(abs_directory, name, dirtagname, filetagname,
                        filegauges, pattern, exclude_dirs_pattern, recursive,
                        countonly, custom_tags)
示例#3
0
    def check(self, instance):
        try:
            directory = instance['directory']
        except KeyError:
            raise ConfigurationError(
                'DirectoryCheck: missing `directory` in config')

        abs_directory = abspath(directory)
        name = instance.get('name', directory)
        pattern = instance.get('pattern')
        exclude_dirs = instance.get('exclude_dirs', [])
        exclude_dirs_pattern = re_compile(
            '|'.join(exclude_dirs)) if exclude_dirs else None
        dirs_patterns_full = is_affirmative(
            instance.get('dirs_patterns_full', False))
        recursive = is_affirmative(instance.get('recursive', False))
        dirtagname = instance.get('dirtagname', 'name')
        filetagname = instance.get('filetagname', 'filename')
        filegauges = is_affirmative(instance.get('filegauges', False))
        countonly = is_affirmative(instance.get('countonly', False))
        ignore_missing = is_affirmative(instance.get('ignore_missing', False))
        custom_tags = instance.get('tags', [])

        if not exists(abs_directory):
            msg = "Either directory '{}' doesn't exist or the Agent doesn't "\
                  "have permissions to access it, skipping.".format(abs_directory)

            if not ignore_missing:
                raise ConfigurationError(msg)

            self.log.warning(msg)

        self._get_stats(abs_directory, name, dirtagname, filetagname,
                        filegauges, pattern, exclude_dirs_pattern,
                        dirs_patterns_full, recursive, countonly, custom_tags)
示例#4
0
    def __init__(self, instance):
        self.channel = instance.get('channel')
        self.queue_manager_name = instance.get('queue_manager', 'default')

        self.host = instance.get('host', 'localhost')
        self.port = instance.get('port', '1414')
        self.host_and_port = "{}({})".format(self.host, self.port)

        self.username = instance.get('username')
        self.password = instance.get('password')

        self.queues = instance.get('queues', [])
        self.queue_patterns = instance.get('queue_patterns', [])

        self.custom_tags = instance.get('tags', [])

        self.auto_discover_queues = is_affirmative(
            instance.get('auto_discover_queues', False))

        self.ssl = is_affirmative(instance.get('ssl_auth', False))
        self.ssl_cipher_spec = instance.get('ssl_cipher_spec',
                                            'TLS_RSA_WITH_AES_256_CBC_SHA')

        self.key_repository_location = instance.get(
            'ssl_key_repository_location',
            '/var/mqm/ssl-db/client/KeyringClient')

        self.mq_installation_dir = instance.get('mq_installation_dir',
                                                '/opt/mqm/')
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        self._ssl_verify = is_affirmative(init_config.get("ssl_verify", True))
        self.keystone_server_url = init_config.get("keystone_server_url")
        self.hypervisor_name_cache = {}

        self.paginated_server_limit = init_config.get(
            'paginated_server_limit') or DEFAULT_PAGINATED_SERVER_LIMIT

        self.request_timeout = init_config.get(
            'request_timeout') or DEFAULT_API_REQUEST_TIMEOUT

        if not self.keystone_server_url:
            raise IncompleteConfig()

        # Current authentication scopes
        self._parent_scope = None
        self._current_scope = None

        # Cache some things between runs for values that change rarely
        self._aggregate_list = None

        # Mapping of check instances to associated OpenStack project scopes
        self.instance_map = {}

        # Mapping of Nova-managed servers to tags
        self.external_host_tags = {}

        self.exclude_network_id_rules = set([
            re.compile(ex) for ex in init_config.get('exclude_network_ids', [])
        ])
        self.exclude_server_id_rules = set([
            re.compile(ex) for ex in init_config.get('exclude_server_ids', [])
        ])
        self.include_project_name_rules = set([
            re.compile(ex)
            for ex in init_config.get('whitelist_project_names', [])
        ])
        self.exclude_project_name_rules = set([
            re.compile(ex)
            for ex in init_config.get('blacklist_project_names', [])
        ])

        skip_proxy = not is_affirmative(
            init_config.get('use_agent_proxy', True))
        self.proxy_config = None if skip_proxy else self.proxies

        self.backoff = BackOffRetry(self)

        # ISO8601 date time: used to filter the call to get the list of nova servers
        self.changes_since_time = {}

        # Ex: server_details_by_id = {
        #   UUID: {UUID: <value>, etc}
        #   1: {id: 1, name: hostA},
        #   2: {id: 2, name: hostB}
        # }
        self.server_details_by_id = {}
示例#6
0
    def _check_for_leader_change(self, instance, instance_state):
        perform_new_leader_checks = is_affirmative(instance.get('new_leader_checks',
                                                                self.init_config.get('new_leader_checks', False)))
        perform_self_leader_check = is_affirmative(instance.get('self_leader_check',
                                                                self.init_config.get('self_leader_check', False)))

        if perform_new_leader_checks and perform_self_leader_check:
            self.log.warn('Both perform_self_leader_check and perform_new_leader_checks are set, '
                          'ignoring perform_new_leader_checks')
        elif not perform_new_leader_checks and not perform_self_leader_check:
            # Nothing to do here
            return

        leader = self._get_cluster_leader(instance)

        if not leader:
            # A few things could be happening here.
            #   1. Consul Agent is Down
            #   2. The cluster is in the midst of a leader election
            #   3. The Datadog agent is not able to reach the Consul instance (network partition et al.)
            self.log.warn('Consul Leader information is not available!')
            return

        if not instance_state.last_known_leader:
            # We have no state preserved, store some and return
            instance_state.last_known_leader = leader
            return

        agent = self._get_agent_url(instance, instance_state)
        agent_dc = self._get_agent_datacenter(instance, instance_state)

        if leader != instance_state.last_known_leader:
            # There was a leadership change
            if perform_new_leader_checks or (perform_self_leader_check and agent == leader):
                # We either emit all leadership changes or emit when we become the leader and that just happened
                self.log.info(('Leader change from {0} to {1}. Sending new leader event').format(
                    instance_state.last_known_leader, leader))

                self.event({
                    "timestamp": int(datetime.now().strftime("%s")),
                    "event_type": "consul.new_leader",
                    "source_type_name": self.SOURCE_TYPE_NAME,
                    "msg_title": "New Consul Leader Elected in consul_datacenter:{0}".format(agent_dc),
                    "aggregation_key": "consul.new_leader",
                    "msg_text": "The Node at {0} is the new leader of the consul datacenter {1}".format(
                        leader,
                        agent_dc
                    ),
                    "tags": ["prev_consul_leader:{0}".format(instance_state.last_known_leader),
                             "curr_consul_leader:{0}".format(leader),
                             "consul_datacenter:{0}".format(agent_dc)]
                })

        instance_state.last_known_leader = leader
示例#7
0
    def get_config(self, instance):
        instance_id = hash_mutable(instance)
        config = self.config.get(instance_id)
        if config is None:
            config = {}

            try:
                api_url = instance['api_url']
                api_version = api_url[-1]
                if api_version not in self.api_versions:
                    self.log.warning(
                        'Unknown Vault API version `{}`, using version '
                        '`{}`'.format(api_version, self.DEFAULT_API_VERSION))

                config['api_url'] = api_url
                config['api'] = self.api_versions.get(
                    api_version, self.DEFAULT_API_VERSION)['functions']
            except KeyError:
                self.log.error(
                    'Vault configuration setting `api_url` is required')
                return

            client_token = instance.get('client_token')
            config['headers'] = {
                'X-Vault-Token': client_token
            } if client_token else None

            username = instance.get('username')
            password = instance.get('password')
            config['auth'] = (username,
                              password) if username and password else None

            config['ssl_verify'] = is_affirmative(
                instance.get('ssl_verify', True))
            config['ssl_ignore_warning'] = is_affirmative(
                instance.get('ssl_ignore_warning', False))
            config['proxies'] = self.get_instance_proxy(
                instance, config['api_url'])
            config['timeout'] = int(instance.get('timeout', 20))
            config['tags'] = instance.get('tags', [])

            # Keep track of the previous cluster leader to detect changes.
            config['leader'] = None
            config['detect_leader'] = is_affirmative(
                instance.get('detect_leader'))

            self.config[instance_id] = config

        return config
示例#8
0
    def check_leader_v1(self, config, tags):
        url = config['api_url'] + '/sys/leader'
        leader_data = self.access_api(url, config, tags).json()

        is_leader = is_affirmative(leader_data.get('is_self'))
        tags.append('is_leader:{}'.format('true' if is_leader else 'false'))

        self.gauge('vault.is_leader', int(is_leader), tags=tags)

        current_leader = leader_data.get('leader_address')
        previous_leader = config['leader']
        if config['detect_leader'] and current_leader:
            if previous_leader is not None and current_leader != previous_leader:
                self.event({
                    'timestamp':
                    timestamp(),
                    'event_type':
                    self.EVENT_LEADER_CHANGE,
                    'msg_title':
                    'Leader change',
                    'msg_text':
                    'Leader changed from `{}` to `{}`.'.format(
                        previous_leader, current_leader),
                    'alert_type':
                    'info',
                    'source_type_name':
                    self.CHECK_NAME,
                    'host':
                    self.hostname,
                    'tags':
                    tags,
                })
            config['leader'] = current_leader
示例#9
0
    def check(self, instance):
        host = instance.get('host', '')
        port = instance.get('port', '')
        user = instance.get('username', '')
        password = instance.get('password', '')
        tags = instance.get('tags', [])
        database_url = instance.get('database_url')
        use_cached = is_affirmative(instance.get('use_cached', True))

        if database_url:
            key = database_url
        else:
            key = '%s:%s' % (host, port)

        if tags is None:
            tags = []
        else:
            tags = list(set(tags))

        try:
            db = self._get_connection(key, host, port, user, password, tags=tags,
                                      database_url=database_url, use_cached=use_cached)
            self._collect_stats(db, tags)
        except ShouldRestartException:
            self.log.info("Resetting the connection")
            db = self._get_connection(key, host, port, user, password, tags=tags,
                                      database_url=database_url, use_cached=False)
            self._collect_stats(db, tags)

        redacted_dsn = self._get_redacted_dsn(host, port, user, database_url)
        message = u'Established connection to {}'.format(redacted_dsn)
        self.service_check(self.SERVICE_CHECK_NAME, AgentCheck.OK,
                           tags=self._get_service_checks_tags(host, port, database_url, tags),
                           message=message)
示例#10
0
    def _cache_morlist_raw(self, instance):
        """
        Fill the Mor objects queue that will be asynchronously processed later.
        Resolve the vCenter `rootFolder` and initiate hosts and virtual machines
        discovery.
        """
        i_key = self._instance_key(instance)
        self.log.debug("Caching the morlist for vcenter instance %s" % i_key)

        # If the queue is not completely empty, don't do anything
        for resource_type in RESOURCE_TYPE_METRICS:
            if self.mor_objects_queue.contains(i_key) and self.mor_objects_queue.size(i_key, resource_type):
                last = self.cache_config.get_last(CacheConfig.Morlist, i_key)
                self.log.debug("Skipping morlist collection: the objects queue for the "
                               "resource type '{}' is still being processed "
                               "(latest refresh was {}s ago)".format(resource_type, time.time() - last))
                return

        instance_tag = "vcenter_server:%s" % instance.get('name')
        regexes = {
            'host_include': instance.get('host_include_only_regex'),
            'vm_include': instance.get('vm_include_only_regex')
        }
        include_only_marked = is_affirmative(instance.get('include_only_marked', False))

        # Discover hosts and virtual machines
        self.pool.apply_async(
            self._cache_morlist_raw_async,
            args=(instance, [instance_tag], regexes, include_only_marked)
        )

        self.cache_config.set_last(CacheConfig.Morlist, i_key, time.time())
示例#11
0
    def create(logger, proxies, instance_config):
        ssl_verify = is_affirmative(instance_config.get("ssl_verify", True))
        paginated_limit = instance_config.get('paginated_limit',
                                              DEFAULT_PAGINATED_LIMIT)
        request_timeout = instance_config.get('request_timeout',
                                              DEFAULT_API_REQUEST_TIMEOUT)
        user = instance_config.get("user")
        openstack_config_file_path = instance_config.get(
            "openstack_config_file_path")
        openstack_cloud_name = instance_config.get("openstack_cloud_name")

        # If an OpenStack configuration is specified, an OpenstackSDKApi is created, and the authentication
        # is made directly from the OpenStack configuration file
        if openstack_cloud_name is None:
            keystone_server_url = instance_config.get("keystone_server_url")
            api = SimpleApi(
                logger,
                keystone_server_url,
                timeout=request_timeout,
                ssl_verify=ssl_verify,
                proxies=proxies,
                limit=paginated_limit,
            )
            api.connect(user)
        else:
            api = OpenstackSDKApi(logger)
            api.connect(openstack_config_file_path, openstack_cloud_name)

        return api
    def get_network_stats(self, tags):
        """
        Collect stats for all reachable networks
        """

        # FIXME: (aaditya) Check all networks defaults to true
        # until we can reliably assign agents to networks to monitor
        if is_affirmative(self.init_config.get('check_all_networks', True)):
            all_network_ids = set(self.get_all_network_ids())

            # Filter out excluded networks
            network_ids = [
                network_id for network_id in all_network_ids if not any([
                    re.match(exclude_id, network_id)
                    for exclude_id in self.exclude_network_id_rules
                ])
            ]
        else:
            network_ids = self.init_config.get('network_ids', [])

        if not network_ids:
            self.warning(
                "Your check is not configured to monitor any networks.\n" +
                "Please list `network_ids` under your init_config")

        for nid in network_ids:
            self.get_stats_for_single_network(nid, tags)
示例#13
0
    def check(self, instance):
        # Get the configuration for this specific instance
        scraper_config = self.get_scraper_config(instance)

        # Populate the metric transformers dict
        transformers = {}
        limiters = self.DEFAUT_RATE_LIMITERS + instance.get(
            "extra_limiters", [])
        for limiter in limiters:
            transformers[limiter + "_rate_limiter_use"] = self.rate_limiter_use
        queues = self.DEFAULT_QUEUES + instance.get("extra_queues", [])
        for queue in queues:
            for metric, func in iteritems(self.QUEUE_METRICS_TRANSFORMERS):
                transformers[queue + metric] = func

        # Support new metrics (introduced in v1.14.0)
        for metric_name in self.WORKQUEUE_METRICS_RENAMING:
            transformers[metric_name] = self.workqueue_transformer

        self.ignore_deprecated_metrics = instance.get(
            "ignore_deprecated", self.DEFAULT_IGNORE_DEPRECATED)
        if self.ignore_deprecated_metrics:
            self._filter_metric = self._ignore_deprecated_metric

        self.process(scraper_config, metric_transformers=transformers)

        # Check the leader-election status
        if is_affirmative(instance.get('leader_election', True)):
            leader_config = self.LEADER_ELECTION_CONFIG
            leader_config["tags"] = instance.get("tags", [])
            self.check_election_status(leader_config)
示例#14
0
    def get_config(self, instance):
        instance_id = hash_mutable(instance)
        config = self.config.get(instance_id)
        if config is None:
            config = {}

            try:
                api_url = instance['api_url']
                api_version = api_url[-1]
                if api_version not in self.api_versions:
                    self.log.warning(
                        'Unknown Vault API version `%s`, using version `%s`',
                        api_version, self.DEFAULT_API_VERSION)
                    api_url = api_url[:-1] + self.DEFAULT_API_VERSION
                    api_version = self.DEFAULT_API_VERSION

                config['api_url'] = api_url
                config['api'] = self.api_versions[api_version]['functions']
            except KeyError:
                self.log.error(
                    'Vault configuration setting `api_url` is required')
                return

            config['tags'] = instance.get('tags', [])

            # Keep track of the previous cluster leader to detect changes.
            config['leader'] = None
            config['detect_leader'] = is_affirmative(
                instance.get('detect_leader'))

            self.config[instance_id] = config

        return config
示例#15
0
    def create(logger, proxies, instance_config):
        keystone_server_url = instance_config.get("keystone_server_url")
        ssl_verify = is_affirmative(instance_config.get("ssl_verify", True))
        paginated_limit = instance_config.get('paginated_limit')
        request_timeout = instance_config.get('request_timeout')
        user = instance_config.get("user")
        openstack_config_file_path = instance_config.get(
            "openstack_config_file_path")
        openstack_cloud_name = instance_config.get("openstack_cloud_name")

        api = None

        # If an openstack configuration is specified, an OpenstackSDKApi will be created, and the authentification
        # will be made directly from the openstack configuration file
        if openstack_cloud_name is None:
            api = SimpleApi(logger,
                            keystone_server_url,
                            timeout=request_timeout,
                            ssl_verify=ssl_verify,
                            proxies=proxies,
                            limit=paginated_limit)
            api.connect(user)
        else:
            api = OpenstackSDKApi(logger)
            api.connect(openstack_config_file_path, openstack_cloud_name)

        return api
示例#16
0
    def _get_conn(self, instance):
        no_cache = is_affirmative(
            instance.get('disable_connection_cache', False))
        key = self._generate_instance_key(instance)

        if no_cache or key not in self.connections:
            try:
                # Only send useful parameters to the redis client constructor
                list_params = [
                    'host', 'port', 'db', 'password', 'socket_timeout',
                    'connection_pool', 'charset', 'errors', 'unix_socket_path',
                    'ssl', 'ssl_certfile', 'ssl_keyfile', 'ssl_ca_certs',
                    'ssl_cert_reqs'
                ]

                # Set a default timeout (in seconds) if no timeout is specified in the instance config
                instance['socket_timeout'] = instance.get('socket_timeout', 5)
                connection_params = dict(
                    (k, instance[k]) for k in list_params if k in instance)
                # If caching is disabled, we overwrite the dictionary value so the old connection
                # will be closed as soon as the corresponding Python object gets garbage collected
                self.connections[key] = redis.Redis(**connection_params)

            except TypeError:
                msg = "You need a redis library that supports authenticated connections. Try sudo easy_install redis."
                raise Exception(msg)

        return self.connections[key]
示例#17
0
    def __init__(self, instance):
        self.channel = instance.get('channel')
        self.queue_manager_name = instance.get('queue_manager', 'default')

        self.host = instance.get('host', 'localhost')
        self.port = instance.get('port', '1414')
        self.host_and_port = "{}({})".format(self.host, self.port)

        self.username = instance.get('username')
        self.password = instance.get('password')

        self.queues = instance.get('queues', [])
        self.queue_patterns = instance.get('queue_patterns', [])
        self.queue_regex = [
            re.compile(regex) for regex in instance.get('queue_regex', [])
        ]

        self.auto_discover_queues = is_affirmative(
            instance.get('auto_discover_queues', False))

        if int(self.auto_discover_queues) + int(bool(
                self.queue_patterns)) + int(bool(self.queue_regex)) > 1:
            log.warning(
                "Configurations auto_discover_queues, queue_patterns and queue_regex are not intended to be used "
                "together.")

        self.channels = instance.get('channels', [])

        self.channel_status_mapping = self.get_channel_status_mapping(
            instance.get('channel_status_mapping'))

        self.custom_tags = instance.get('tags', [])

        self.ssl = is_affirmative(instance.get('ssl_auth', False))
        self.ssl_cipher_spec = instance.get('ssl_cipher_spec',
                                            'TLS_RSA_WITH_AES_256_CBC_SHA')

        self.ssl_key_repository_location = instance.get(
            'ssl_key_repository_location',
            '/var/mqm/ssl-db/client/KeyringClient')

        self.mq_installation_dir = instance.get('mq_installation_dir',
                                                '/opt/mqm/')

        self._queue_tag_re = instance.get('queue_tag_re', {})
        self.queue_tag_re = self._compile_tag_re()
示例#18
0
def test_is_affirmative():
    assert config.is_affirmative(None) is False
    assert config.is_affirmative(0) is False
    assert config.is_affirmative("whatever, it could be 'off'") is False

    assert config.is_affirmative(1) is True
    assert config.is_affirmative('YES') is True
    assert config.is_affirmative('True') is True
    assert config.is_affirmative('On') is True
    assert config.is_affirmative('1') is True
示例#19
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        AgentCheck.__init__(self, name, init_config, agentConfig, instances)

        # Cache connections
        self.connections = {}
        self.failed_connections = {}
        self.instances_metrics = {}
        self.instances_per_type_metrics = defaultdict(dict)
        self.existing_databases = None
        self.do_check = {}
        self.proc_type_mapping = {'gauge': self.gauge, 'rate': self.rate, 'histogram': self.histogram}
        self.adoprovider = self.default_adoprovider

        self.connector = init_config.get('connector', 'adodbapi')
        if self.connector.lower() not in self.valid_connectors:
            self.log.error("Invalid database connector %s, defaulting to adodbapi", self.connector)
            self.connector = 'adodbapi'

        self.adoprovider = init_config.get('adoprovider', self.default_adoprovider)
        if self.adoprovider.upper() not in self.valid_adoproviders:
            self.log.error(
                "Invalid ADODB provider string %s, defaulting to %s", self.adoprovider, self.default_adoprovider
            )
            self.adoprovider = self.default_adoprovider

        # Pre-process the list of metrics to collect
        self.custom_metrics = init_config.get('custom_metrics', [])
        for instance in instances:
            try:
                instance_key = self._conn_key(instance, self.DEFAULT_DB_KEY)
                self.do_check[instance_key] = True

                # check to see if the database exists before we try any connections to it
                with self.open_managed_db_connections(instance, None, db_name=self.DEFAULT_DATABASE):
                    db_exists, context = self._check_db_exists(instance)

                if db_exists:
                    if instance.get('stored_procedure') is None:
                        with self.open_managed_db_connections(instance, self.DEFAULT_DB_KEY):
                            self._make_metric_list_to_collect(instance, self.custom_metrics)
                else:
                    # How much do we care that the DB doesn't exist?
                    ignore = is_affirmative(instance.get("ignore_missing_database", False))
                    if ignore is not None and ignore:
                        # not much : we expect it. leave checks disabled
                        self.do_check[instance_key] = False
                        self.log.warning("Database %s does not exist. Disabling checks for this instance.", context)
                    else:
                        # yes we do. Keep trying
                        self.log.error("Database %s does not exist. Fix issue and restart agent", context)

            except SQLConnectionError:
                self.log.exception("Skipping SQL Server instance")
                continue
            except Exception as e:
                self.log.exception("INitialization exception %s", e)
                continue
示例#20
0
    def _get_auth_response_from_config(cls, logger, init_config, instance_config, proxy_config=None):
        keystone_server_url = init_config.get("keystone_server_url")
        if not keystone_server_url:
            raise IncompleteConfig()
        ssl_verify = is_affirmative(init_config.get("ssl_verify", False))

        identity = cls._get_user_identity(instance_config)
        keystone_api = KeystoneApi(logger, ssl_verify, proxy_config, keystone_server_url, None)
        resp = keystone_api.post_auth_token(identity)
        return resp.headers.get('X-Subject-Token')
示例#21
0
 def _cache_morlist_raw_async(self, instance, tags, regexes=None, include_only_marked=False):
     """
     Fills the queue in a separate thread
     """
     i_key = self._instance_key(instance)
     server_instance = self._get_server_instance(instance)
     use_guest_hostname = is_affirmative(instance.get("use_guest_hostname", False))
     all_objs = self._get_all_objs(server_instance, regexes, include_only_marked, tags,
                                   use_guest_hostname=use_guest_hostname)
     self.mor_objects_queue.fill(i_key, dict(all_objs))
    def __init__(self, name, init_config, agentConfig, instances=None):
        # We do not support more than one instance of kube-state-metrics
        instance = instances[0]
        kubernetes_state_instance = self._create_kubernetes_state_prometheus_instance(instance)

        # First deprecation phase: we keep ksm labels by default
        # Next iteration: remove ksm labels by default
        # Last iteration: remove this option
        self.keep_ksm_labels = is_affirmative(kubernetes_state_instance.get('keep_ksm_labels', True))

        generic_instances = [kubernetes_state_instance]
        super(KubernetesState, self).__init__(name, init_config, agentConfig, instances=generic_instances)

        self.condition_to_status_positive = {'true': self.OK, 'false': self.CRITICAL, 'unknown': self.UNKNOWN}

        self.condition_to_status_negative = {'true': self.CRITICAL, 'false': self.OK, 'unknown': self.UNKNOWN}

        # Parameters for the count_objects_by_tags method
        self.object_count_params = {
            'kube_persistentvolume_status_phase': {
                'metric_name': 'persistentvolumes.by_phase',
                'allowed_labels': ['storageclass', 'phase'],
            },
            'kube_service_spec_type': {'metric_name': 'service.count', 'allowed_labels': ['namespace', 'type']},
        }

        self.METRIC_TRANSFORMERS = {
            'kube_pod_status_phase': self.kube_pod_status_phase,
            'kube_pod_container_status_waiting_reason': self.kube_pod_container_status_waiting_reason,
            'kube_pod_container_status_terminated_reason': self.kube_pod_container_status_terminated_reason,
            'kube_cronjob_next_schedule_time': self.kube_cronjob_next_schedule_time,
            'kube_job_complete': self.kube_job_complete,
            'kube_job_failed': self.kube_job_failed,
            'kube_job_status_failed': self.kube_job_status_failed,
            'kube_job_status_succeeded': self.kube_job_status_succeeded,
            'kube_node_status_condition': self.kube_node_status_condition,
            'kube_node_status_ready': self.kube_node_status_ready,
            'kube_node_status_out_of_disk': self.kube_node_status_out_of_disk,
            'kube_node_status_memory_pressure': self.kube_node_status_memory_pressure,
            'kube_node_status_disk_pressure': self.kube_node_status_disk_pressure,
            'kube_node_status_network_unavailable': self.kube_node_status_network_unavailable,
            'kube_node_spec_unschedulable': self.kube_node_spec_unschedulable,
            'kube_resourcequota': self.kube_resourcequota,
            'kube_limitrange': self.kube_limitrange,
            'kube_persistentvolume_status_phase': self.count_objects_by_tags,
            'kube_service_spec_type': self.count_objects_by_tags,
        }

        # Handling cron jobs succeeded/failed counts
        self.failed_cron_job_counts = defaultdict(KubernetesState.CronJobCount)
        self.succeeded_cron_job_counts = defaultdict(KubernetesState.CronJobCount)

        # Logic for Jobs
        self.job_succeeded_count = defaultdict(int)
        self.job_failed_count = defaultdict(int)
    def _get_config(self, instance):
        server = instance.get('server')
        user = instance.get('user')
        password = instance.get('password')
        service = instance.get('service_name')
        jdbc_driver = instance.get('jdbc_driver_path')
        tags = instance.get('tags') or []
        custom_queries = instance.get('custom_queries', [])
        if is_affirmative(instance.get('use_global_custom_queries', True)):
            custom_queries.extend(self.init_config.get('global_custom_queries', []))

        return server, user, password, service, jdbc_driver, tags, custom_queries
示例#24
0
    def check(self, instance):
        status_url = instance.get('status_url')
        ping_url = instance.get('ping_url')
        use_fastcgi = is_affirmative(instance.get('use_fastcgi', False))
        ping_reply = instance.get('ping_reply')

        auth = None
        user = instance.get('user')
        password = instance.get('password')

        tags = instance.get('tags', [])
        http_host = instance.get('http_host')

        timeout = instance.get('timeout', DEFAULT_TIMEOUT)

        disable_ssl_validation = is_affirmative(
            instance.get('disable_ssl_validation', False))

        if user and password:
            auth = (user, password)

        if status_url is None and ping_url is None:
            raise BadConfigError(
                "No status_url or ping_url specified for this instance")

        pool = None
        if status_url is not None:
            try:
                pool = self._process_status(status_url, auth, tags, http_host,
                                            timeout, disable_ssl_validation,
                                            use_fastcgi)
            except Exception as e:
                self.log.error("Error running php_fpm check: {}".format(e))

        if ping_url is not None:
            self._process_ping(ping_url, ping_reply, auth, tags, pool,
                               http_host, timeout, disable_ssl_validation,
                               use_fastcgi)
示例#25
0
    def _get_request_url(self, instance, url):
        '''
        Get the request address, build with proxy if necessary
        '''
        parsed = urlparse(url)

        _url = url
        if not (parsed.netloc and parsed.scheme) and \
                is_affirmative(instance.get('spark_proxy_enabled', False)):
            master_address = self._get_master_address(instance)
            _url = urljoin(master_address, parsed.path)

        self.log.debug('Request URL returned: %s', _url)
        return _url
示例#26
0
    def _cache_morlist_raw(self, instance):
        """
        Fill the Mor objects queue that will be asynchronously processed later.
        Resolve the vCenter `rootFolder` and initiate hosts and virtual machines
        discovery.
        """
        i_key = self._instance_key(instance)
        self.log.debug("Caching the morlist for vcenter instance {}".format(i_key))

        # If the queue is not completely empty, don't do anything
        for resource_type in RESOURCE_TYPE_METRICS:
            if self.mor_objects_queue.contains(i_key) and self.mor_objects_queue.size(i_key, resource_type):
                last = self.cache_config.get_last(CacheConfig.Morlist, i_key)
                self.log.debug(
                    "Skipping morlist collection: the objects queue for the "
                    "resource type '{}' is still being processed "
                    "(latest refresh was {}s ago)".format(
                        ensure_unicode(resource_type), time.time() - last
                    )
                )
                return

        tags = ["vcenter_server:{}".format(ensure_unicode(instance.get('name')))]
        regexes = {
            'host_include': instance.get('host_include_only_regex'),
            'vm_include': instance.get('vm_include_only_regex')
        }
        include_only_marked = is_affirmative(instance.get('include_only_marked', False))

        # Discover hosts and virtual machines
        server_instance = self._get_server_instance(instance)
        use_guest_hostname = is_affirmative(instance.get("use_guest_hostname", False))
        all_objs = self._get_all_objs(server_instance, regexes, include_only_marked, tags,
                                      use_guest_hostname=use_guest_hostname)

        self.mor_objects_queue.fill(i_key, dict(all_objs))
        self.cache_config.set_last(CacheConfig.Morlist, i_key, time.time())
示例#27
0
    def from_config(cls, init_config, instance_config, proxy_config=None):
        keystone_server_url = init_config.get("keystone_server_url")
        if not keystone_server_url:
            raise IncompleteConfig()

        ssl_verify = is_affirmative(init_config.get("ssl_verify", True))
        auth_token = cls._get_auth_response_from_config(
            init_config, instance_config, proxy_config)

        # list all projects
        projects = KeystoneApi.get_auth_projects(auth_token,
                                                 keystone_server_url,
                                                 ssl_verify, proxy_config)

        # for each project, we create an OpenStackProject object that we add to the `project_scope_map` dict
        project_scope_map = {}
        for project in projects:
            identity = {"methods": ['token'], "token": {"id": auth_token}}
            scope = {'project': {'id': project.get('id')}}
            token_resp = KeystoneApi.post_auth_token(keystone_server_url,
                                                     identity,
                                                     ssl_verify,
                                                     scope=scope,
                                                     proxy=proxy_config)

            project_auth_token = token_resp.headers.get('X-Subject-Token')
            nova_endpoint = cls._get_nova_endpoint(token_resp.json())
            neutron_endpoint = cls._get_neutron_endpoint(token_resp.json())
            project_auth_scope = {
                'project': {
                    'name': project.get('name'),
                    'id': project.get('id'),
                    'domain': {} if project.get('domain_id') is None else {
                        'id': project.get('domain_id')
                    },
                }
            }

            project_scope = OpenStackProject(project_auth_token,
                                             project_auth_scope, nova_endpoint,
                                             neutron_endpoint)
            project_name = project.get('name')
            project_id = project.get('id')
            if project_name is None or project_id is None:
                break
            project_key = (project_name, project_id)
            project_scope_map[project_key] = project_scope

        return cls(auth_token, project_scope_map)
示例#28
0
    def create(logger, proxies, instance_config):
        keystone_server_url = instance_config.get("keystone_server_url")
        ssl_verify = is_affirmative(instance_config.get("ssl_verify", True))
        paginated_limit = instance_config.get('paginated_limit')
        request_timeout = instance_config.get('request_timeout')
        user = instance_config.get("user")

        api = SimpleApi(logger,
                        keystone_server_url,
                        timeout=request_timeout,
                        ssl_verify=ssl_verify,
                        proxies=proxies,
                        limit=paginated_limit)
        api.connect(user)
        return api
示例#29
0
    def _process_mor_objects_queue(self, instance):
        """
        Pops `batch_morlist_size` items from the mor objects queue and run asynchronously
        the _process_mor_objects_queue_async method to fill the Mor cache.
        """
        i_key = self._instance_key(instance)
        self.mor_cache.init_instance(i_key)

        if not self.mor_objects_queue.contains(i_key):
            self.log.debug(
                b"Objects queue is not initialized yet for instance {}, skipping processing"
                .format(i_key))
            return

        for resource_type in RESOURCE_TYPE_METRICS:
            # Batch size can prevent querying large payloads at once if the environment is too large
            # If batch size is set to 0, process everything at once
            batch_size = self.batch_morlist_size or self.mor_objects_queue.size(
                i_key, resource_type)
            while self.mor_objects_queue.size(i_key, resource_type):
                mors = []
                for _ in xrange(batch_size):
                    mor = self.mor_objects_queue.pop(i_key, resource_type)
                    if mor is None:
                        self.log.debug(
                            "No more objects of type '{}' left in the queue".
                            format(resource_type))
                        break

                    mor_name = str(mor['mor'])
                    mor['interval'] = REAL_TIME_INTERVAL if mor[
                        'mor_type'] in REALTIME_RESOURCES else None
                    # Always update the cache to account for Mors that might have changed parent
                    # in the meantime (e.g. a migrated VM).
                    self.mor_cache.set_mor(i_key, mor_name, mor)

                    # Only do this for non real-time resources i.e. datacenter, datastore and cluster
                    # For hosts and VMs, we can rely on a precomputed list of metrics
                    realtime_only = is_affirmative(
                        instance.get("collect_realtime_only", True))
                    if mor["mor_type"] not in REALTIME_RESOURCES and not realtime_only:
                        mors.append(mor)

                # We will actually schedule jobs for non realtime resources only.
                if mors:
                    self.pool.apply_async(
                        self._process_mor_objects_queue_async,
                        args=(instance, mors))
示例#30
0
    def __init__(self, name, init_config, agentConfig, instances=None):
        super(OpenStackControllerCheck, self).__init__(name, init_config, agentConfig, instances)
        self.keystone_server_url = init_config.get("keystone_server_url")

        if not self.keystone_server_url:
            raise IncompleteConfig()
        self.proxy_config = self.get_instance_proxy(init_config, self.keystone_server_url)

        self.ssl_verify = is_affirmative(init_config.get("ssl_verify", True))

        self.paginated_server_limit = init_config.get('paginated_server_limit') or DEFAULT_PAGINATED_SERVER_LIMIT
        self.request_timeout = init_config.get('request_timeout') or DEFAULT_API_REQUEST_TIMEOUT

        exclude_network_id_patterns = set(init_config.get('exclude_network_ids', []))
        self.exclude_network_id_rules = [re.compile(ex) for ex in exclude_network_id_patterns]
        exclude_server_id_patterns = set(init_config.get('exclude_server_ids', []))
        self.exclude_server_id_rules = [re.compile(ex) for ex in exclude_server_id_patterns]
        include_project_name_patterns = set(init_config.get('whitelist_project_names', []))
        self.include_project_name_rules = [re.compile(ex) for ex in include_project_name_patterns]
        exclude_project_name_patterns = set(init_config.get('blacklist_project_names', []))
        self.exclude_project_name_rules = [re.compile(ex) for ex in exclude_project_name_patterns]

        self._keystone_api = None
        self._compute_api = None
        self._neutron_api = None

        self._backoff = BackOffRetry()

        # Mapping of check instances to associated OpenStackScope
        self.instance_scopes_cache = {}
        # Current instance and project authentication scopes
        self.instance_scope = None

        # Cache some things between runs for values that change rarely
        self._aggregate_list = None

        # Mapping of Nova-managed servers to tags
        self.external_host_tags = {}

        # ISO8601 date time: used to filter the call to get the list of nova servers
        self.changes_since_time = {}

        # Ex: server_details_by_id = {
        #   UUID: {UUID: <value>, etc}
        #   1: {id: 1, name: hostA},
        #   2: {id: 2, name: hostB}
        # }
        self.server_details_by_id = {}