Exemplo n.º 1
0
class ServiceMonitoringClient:
    """Client for Cloud Service Monitoring.

    Args:
        project_id (str): Cloud host project id.
    """
    def __init__(self, project_id):
        self.client = ServiceMonitoringServiceClient()
        self.project_id = project_id
        self.project = f'projects/{project_id}'
        self.workspace = f'workspaces/{project_id}'

    def create_service(self, service_id, service_config):
        """Create Service object in Cloud Service Monitoring API.

        Args:
            service_id (str): Service id.
            service_config (dict): Service config.
            service_config (str): Service config path.

        Returns:
            dict: Cloud Service Monitoring API response.
        """
        return self.client.create_service(
            parent=self.project,
            service=types.Service(service_config),
            service_id=service_id)

    def get_service(self, service_id):
        """Get Service object in Cloud Service Monitoring API.

        Args:
            service_id (str): Service id.

        Returns:
            dict: Cloud Service Monitoring API response.
        """
        service_path = self.build_service_path(service_id)
        return self.client.get_service(name=service_path)

    def delete_service(self, service_id):
        """Delete Service object in Cloud Service Monitoring API.

        Args:
            service_id (str): Service id.

        Returns:
            dict: Cloud Service Monitoring API response.
        """
        service_path = self.build_service_path(service_id)
        return self.client.delete_service(name=service_path)

    def update_service(self, service_config):
        """Update Service object in Cloud Service Monitoring API.

        Args:
            service_config (dict): Service config.

        Returns:
            dict: Cloud Service Monitoring API response.
        """
        return self.client.update_service(
            service=types.Service(service_config))

    def list_services(self):
        """List Cloud Service Monitoring services in project.

        Returns:
            dict: Cloud Service Monitoring API response.
        """
        return self.client.list_services(parent=self.workspace)

    def create_slo(self, service_id, slo_id, slo_config):
        """Create SLO object in Cloud Service Monitoring API.

        Args:
            service_id (str): Cloud Service Monitoring Service id.
            slo_id (str): Cloud Service Monitoring SLO id.
            slo_config (dict): SLO config.
            slo_config (str): SLO config path.

        Returns:
            dict: Service Management API response.
        """
        slo_config = ServiceMonitoringClient._maybe_load(slo_config)
        parent = self.build_service_path(service_id)
        return self.client.create_service_level_objective(
            parent=parent,
            service_level_objective=types.ServiceLevelObjective(slo_config),
            service_level_objective_id=slo_id)

    def get_slo(self, service_id, slo_id):
        """Get SLO object from Cloud Service Monitoring API.

        Args:
            service_id (str): Service identifier.
            slo_id (str): Service Level Objectif identifier.

        Returns:
            dict: API response.
        """
        parent = self.build_slo_path(service_id, slo_id)
        return self.client.get_service_level_objective(name=parent)

    def update_slo(self, service_id, slo_id, slo_config):
        """Update an existing SLO.

        Args:
            service_id (str): Cloud Service Monitoring Service id.
            slo_id (str): Cloud Service Monitoring SLO id.
            slo_config (str | dict): SLO config path or dict.

        Returns:
            dict: API response.
        """
        slo_config = ServiceMonitoringClient._maybe_load(slo_config)
        slo_id = self.build_slo_path(service_id, slo_id)
        slo_config['name'] = slo_id
        return self.client.update_service_level_objective(
            service_level_objectives=types.ServiceLevelObjective(slo_config))

    def list_slos(self, service_id):
        """List all SLOs from Cloud Service Monitoring API.

        Args:
            service_path (str): Service path in the form
                'projects/{project_id}/services/{service_id}'.
            slo_config (dict): SLO configuration.

        Returns:
            dict: API response.
        """
        service_path = self.build_service_path(service_id)
        return self.client.list_service_level_objectives(parent=service_path)

    def delete_slo(self, service_id, slo_id):
        """Delete SLO from Cloud Monitoring API.

        Args:
            service_id (str): Cloud Service Monitoring Service id.
            slo_id (str): Cloud Service Monitoring SLO id.

        Returns:
            dict: API response.
        """
        slo_path = self.build_slo_path(service_id, slo_id)
        return self.client.delete_service_level_objective(name=slo_path)

    def build_service_path(self, service_id):
        """Build Service object path.

        Args:
            service_id (str): Cloud Service Monitoring Service id.

        Returns:
            str: Service full path.
        """
        return f'projects/{self.project_id}/services/{service_id}'

    def build_slo_path(self, service_id, slo_id):
        """Build SLO object path.

        Args:
            service_id (str): Cloud Service Monitoring Service id.
            slo_id (str): Cloud Service Monitoring SLO id.

        Returns:
            str: SLO full path.
        """
        service_path = self.build_service_path(service_id)
        return f'{service_path}/serviceLevelObjectives/{slo_id}'

    @staticmethod
    def _maybe_load(config):
        """Maybe load something from file.

        Args:
            config (dict): Config dict.
            config (str): Config filepath.

        Returns:
            dict: JSON config (loaded from file or from string)
        """
        if os.path.exists(config):
            with open(config) as cfg:
                config = json.load(cfg)
        else:
            config = json.loads(config)
        return config
Exemplo n.º 2
0
class StackdriverServiceMonitoringBackend:
    """Stackdriver Service Monitoring backend class.

    Args:
        project_id (str): Stackdriver host project id.
        client (google.cloud.monitoring_v3.ServiceMonitoringServiceClient):
            Existing Service Monitoring API client. Initialize a new client if
            omitted.
    """
    def __init__(self, project_id, client=None):
        self.project_id = project_id
        self.client = client
        if client is None:
            self.client = ServiceMonitoringServiceClient()
        self.parent = self.client.project_path(project_id)
        self.workspace_path = f'workspaces/{project_id}'
        self.project_path = f'projects/{project_id}'

    def good_bad_ratio(self, timestamp, window, slo_config):
        """Good bad ratio method.

        Args:
            timestamp (int): UNIX timestamp.
            window (int): Window in seconds.
            slo_config (dict): SLO configuration.

        Returns:
            dict: SLO config.
        """
        return self.retrieve_slo(timestamp, window, slo_config)

    def distribution_cut(self, timestamp, window, slo_config):
        """Distribution cut method.

        Args:
            timestamp (int): UNIX timestamp.
            window (int): Window in seconds.
            slo_config (dict): SLO configuration.

        Returns:
            dict: SLO config.
        """
        return self.retrieve_slo(timestamp, window, slo_config)

    def basic(self, timestamp, window, slo_config):
        """Basic method (automatic SLOs for GAE / GKE (Istio) and Cloud
        Endpoints).

        Args:
            timestamp (int): UNIX timestamp.
            window (int): Window in seconds.
            slo_config (dict): SLO configuration.

        Returns:
            dict: SLO config.
        """
        return self.retrieve_slo(timestamp, window, slo_config)

    def window(self, timestamp, window, slo_config):
        """Window-based SLI method.

        Args:
            timestamp (int): UNIX timestamp.
            window (int): Window in seconds.
            slo_config (dict): SLO configuration.

        Returns:
            dict: SLO config.
        """
        return self.retrieve_slo(timestamp, window, slo_config)

    # pylint: disable=unused-argument
    def delete(self, timestamp, window, slo_config):
        """Delete method.

        Args:
            timestamp (int): UNIX timestamp.
            window (int): Window in seconds.
            slo_config (dict): SLO configuration.

        Returns:
            dict: SLO config.
        """
        return self.delete_slo(window, slo_config)

    def retrieve_slo(self, timestamp, window, slo_config):
        """Get SLI value from Stackdriver Monitoring API.

        Args:
            timestamp (int): UNIX timestamp.
            window (int): Window in seconds.
            slo_config (dict): SLO configuration.

        Returns:
            dict: SLO config.
        """
        # Get or create service
        service = self.get_service(slo_config)
        if service is None:
            service = self.create_service(slo_config)
        LOGGER.debug(service)

        # Get or create SLO
        slo = self.get_slo(window, slo_config)
        if not slo:
            slo = self.create_slo(window, slo_config)
        LOGGER.debug(service)

        # Now that we have our SLO, retrieve the TimeSeries from Stackdriver
        # Monitoring API for that particular SLO id.
        metric_filter = SSM.build_slo_id(window, slo_config, full=True)
        filter = f"select_slo_counts(\"{metric_filter}\")"

        # Query SLO timeseries
        stackdriver = StackdriverBackend(self.project_id)
        timeseries = stackdriver.query(timestamp,
                                       window,
                                       filter,
                                       aligner='ALIGN_SUM',
                                       reducer='REDUCE_SUM',
                                       group_by=['metric.labels.event_type'])
        timeseries = list(timeseries)
        good_event_count, bad_event_count = SSM.count(timeseries)
        return (good_event_count, bad_event_count)

    @staticmethod
    def count(timeseries):
        """Extract good_count, bad_count tuple from Stackdriver Monitoring API
        response.

        Args:
            timeseries (list): List of timeseries objects.

        Returns:
            tuple: A tuple (good_event_count, bad_event_count).
        """
        good_event_count, bad_event_count = NO_DATA, NO_DATA
        for timeserie in timeseries:
            event_type = timeserie.metric.labels['event_type']
            value = timeserie.points[0].value.double_value
            if event_type == 'bad':
                bad_event_count = value
            elif event_type == 'good':
                good_event_count = value
        return good_event_count, bad_event_count

    def create_service(self, slo_config):
        """Create Service object in Stackdriver Service Monitoring API.

        Args:
            slo_config (dict): SLO configuration.

        Returns:
            dict: Stackdriver Service Monitoring API response.
        """
        LOGGER.debug("Creating service ...")
        service_json = SSM.build_service(slo_config)
        service_id = SSM.build_service_id(slo_config)
        service = self.client.create_service(self.project_path,
                                             service_json,
                                             service_id=service_id)
        LOGGER.info(
            f'Service "{service_id}" created successfully in Stackdriver '
            f'Service Monitoring API.')
        return SSM.to_json(service)

    def get_service(self, slo_config):
        """Get Service object from Stackdriver Service Monitoring API.

        Args:
            slo_config (dict): SLO configuration.

        Returns:
            dict: Service config.
        """

        # Look for API services in workspace matching our config.
        service_id = SSM.build_service_id(slo_config)
        services = list(self.client.list_services(self.workspace_path))
        matches = [
            service for service in services
            if service.name.split("/")[-1] == service_id
        ]

        # If no match is found for our service name in the API, raise an
        # exception if the service should have been auto-added (method 'basic'),
        # else output a warning message.
        if not matches:
            msg = (f'Service "{service_id}" does not exist in '
                   f'workspace "{self.project_id}"')
            method = slo_config['backend']['method']
            if method == 'basic':
                sids = [service.name.split("/")[-1] for service in services]
                LOGGER.debug(
                    f'List of services in workspace {self.project_id}: {sids}')
                LOGGER.error(msg)
                raise Exception(msg)
            LOGGER.error(msg)
            return None

        # Match found in API, return it.
        service = matches[0]
        LOGGER.debug(f'Found matching service "{service.name}"')
        return SSM.to_json(service)

    @staticmethod
    def build_service(slo_config):
        """Build service JSON in Stackdriver Monitoring API from SLO
        configuration.

        Args:
            slo_config (dict): SLO configuration.

        Returns:
            dict: Service JSON in Stackdriver Monitoring API.
        """
        service_id = SSM.build_service_id(slo_config)
        display_name = slo_config.get('service_display_name', service_id)
        service = {'display_name': display_name, 'custom': {}}
        return service

    @staticmethod
    def build_service_id(slo_config, dest_project_id=None, full=False):
        """Build service id from SLO configuration.

        Args:
            slo_config (dict): SLO configuration.
            dest_project_id (str, optional): Project id for service if different
                than the workspace project id.
            full (bool): If True, return full service resource id including
                project path.

        Returns:
            str: Service id.
        """
        service_name = slo_config['service_name']
        feature_name = slo_config['feature_name']
        backend = slo_config['backend']
        project_id = backend['project_id']
        measurement = backend['measurement']
        app_engine = measurement.get('app_engine')
        cluster_istio = measurement.get('cluster_istio')
        mesh_istio = measurement.get('mesh_istio')
        cloud_endpoints = measurement.get('cloud_endpoints')

        # Use auto-generated ids for 'custom' SLOs, use system-generated ids
        # for all other types of SLOs.
        if app_engine:
            service_id = SID_GAE.format_map(app_engine)
            dest_project_id = app_engine['project_id']
        elif cluster_istio:
            service_id = SID_CLUSTER_ISTIO.format_map(cluster_istio)
            dest_project_id = cluster_istio['project_id']
        elif mesh_istio:
            service_id = SID_MESH_ISTIO.format_map(mesh_istio)
        elif cloud_endpoints:
            service_id = SID_CLOUD_ENDPOINT.format_map(cloud_endpoints)
            dest_project_id = cluster_istio['project_id']
        else:
            service_id = f'{service_name}-{feature_name}'

        if full:
            if dest_project_id:
                return f'projects/{dest_project_id}/services/{service_id}'
            return f'projects/{project_id}/services/{service_id}'

        return service_id

    def create_slo(self, window, slo_config):
        """Create SLO object in Stackdriver Service Monitoring API.

        Args:
            window (int): Window (in seconds).
            slo_config (dict): SLO config.

        Returns:
            dict: Service Management API response.
        """
        slo_json = SSM.build_slo(window, slo_config)
        slo_id = SSM.build_slo_id(window, slo_config)
        parent = SSM.build_service_id(slo_config, full=True)
        slo = self.client.create_service_level_objective(
            parent, slo_json, service_level_objective_id=slo_id)
        return SSM.to_json(slo)

    @staticmethod
    def build_slo(window, slo_config):  # pylint: disable=R0912,R0915
        """Get SLO JSON representation in Service Monitoring API from SLO
        configuration.

        Args:
            window (int): Window (in seconds).
            slo_config (dict): SLO Configuration.

        Returns:
            dict: SLO JSON configuration.
        """
        measurement = slo_config['backend'].get('measurement', {})
        method = slo_config['backend']['method']
        description = slo_config['slo_description']
        target = slo_config['slo_target']
        minutes, _ = divmod(window, 60)
        hours, _ = divmod(minutes, 60)
        display_name = f'{description} ({hours}h)'
        slo = {
            'display_name': display_name,
            'goal': target,
            'rolling_period': {
                'seconds': window
            }
        }
        filter_valid = measurement.get('filter_valid', "")
        if method == 'basic':
            methods = measurement.get('method', [])
            locations = measurement.get('location', [])
            versions = measurement.get('version', [])
            threshold = measurement.get('latency', {}).get('threshold')
            slo['service_level_indicator'] = {'basic_sli': {}}
            basic_sli = slo['service_level_indicator']['basic_sli']
            if methods:
                basic_sli['method'] = methods
            if locations:
                basic_sli['location'] = locations
            if versions:
                basic_sli['version'] = versions
            if threshold:
                basic_sli['latency'] = {
                    'threshold': {
                        'seconds': 0,
                        'nanos': int(threshold) * 10**6
                    }
                }
            else:
                basic_sli['availability'] = {}

        elif method == 'good_bad_ratio':
            filter_good = measurement.get('filter_good', "")
            filter_bad = measurement.get('filter_bad', "")
            slo['service_level_indicator'] = {
                'request_based': {
                    'good_total_ratio': {}
                }
            }
            sli = slo['service_level_indicator']
            ratio = sli['request_based']['good_total_ratio']
            if filter_good:
                ratio['good_service_filter'] = filter_good
            if filter_bad:
                ratio['bad_service_filter'] = filter_bad
            if filter_valid:
                ratio['total_service_filter'] = filter_valid

        elif method == 'distribution_cut':
            range_min = measurement.get('range_min', 0)
            range_max = measurement['range_max']
            slo['service_level_indicator'] = {
                'request_based': {
                    'distribution_cut': {
                        'distribution_filter': filter_valid,
                        'range': {
                            'max': float(range_max)
                        }
                    }
                }
            }
            sli = slo['service_level_indicator']['request_based']
            if range_min != 0:
                sli['distribution_cut']['range']['min'] = float(range_min)

        elif method == 'windows':
            filter = measurement.get('filter')
            # threshold = conf.get('threshold')
            # mean_in_range = conf.get('filter')
            # sum_in_range = conf.get('filter')
            slo['service_level_indicator'] = {
                'windows_based': {
                    'window_period': window,
                    'good_bad_metric_filter': filter,
                    # 'good_total_ratio_threshold': {
                    #   object (PerformanceThreshold)
                    # },
                    # 'metricMeanInRange': {
                    #   object (MetricRange)
                    # },
                    # 'metricSumInRange': {
                    #   object (MetricRange)
                    # }
                }
            }
        else:
            raise Exception(f'Method "{method}" is not supported.')
        return slo

    def get_slo(self, window, slo_config):
        """Get SLO object from Stackriver Service Monitoring API.

        Args:
            service_id (str): Service identifier.
            window (int): Window in seconds.
            slo_config (dict): SLO config.

        Returns:
            dict: API response.
        """
        service_path = SSM.build_service_id(slo_config, full=True)
        LOGGER.debug(f'Getting SLO for for "{service_path}" ...')
        slos = self.list_slos(service_path)
        slo_local_id = SSM.build_slo_id(window, slo_config)
        slo_json = SSM.build_slo(window, slo_config)
        slo_json = SSM.convert_slo_to_ssm_format(slo_json)

        # Loop through API response to find an existing SLO that corresponds to
        # our configuration.
        for slo in slos:
            slo_remote_id = slo['name'].split("/")[-1]
            equal = slo_remote_id == slo_local_id
            if equal:
                LOGGER.debug(f'Found existing SLO "{slo_remote_id}".')
                LOGGER.debug(f'SLO object: {slo}')
                strict_equal = SSM.compare_slo(slo_json, slo)
                if strict_equal:
                    return slo
                return self.update_slo(window, slo_config)
        LOGGER.warning('No SLO found matching configuration.')
        LOGGER.debug(f'SLOs from Stackdriver Monitoring API: {slos}')
        LOGGER.debug(f'SLO config converted: {slo_json}')
        return None

    def update_slo(self, window, slo_config):
        """Update an existing SLO.

        Args:
            window (int): Window (in seconds)
            slo_config (dict): SLO configuration.

        Returns:
            dict: API response.
        """
        slo_json = SSM.build_slo(window, slo_config)
        slo_id = SSM.build_slo_id(window, slo_config, full=True)
        LOGGER.warning(f"Updating SLO {slo_id} ...")
        slo_json['name'] = slo_id
        return SSM.to_json(
            self.client.update_service_level_objective(slo_json))

    def list_slos(self, service_path):
        """List all SLOs from Stackdriver Service Monitoring API.

        Args:
            service_path (str): Service path in the form
                'projects/{project_id}/services/{service_id}'.
            slo_config (dict): SLO configuration.

        Returns:
            dict: API response.
        """
        slos = self.client.list_service_level_objectives(service_path)
        slos = list(slos)
        LOGGER.debug(f"{len(slos)} SLOs found in Service Monitoring API.")
        # LOGGER.debug(slos)
        return [SSM.to_json(slo) for slo in slos]

    def delete_slo(self, window, slo_config):
        """Delete SLO from Stackdriver Monitoring API.

        Args:
            window (int): Window (in seconds).
            slo_config: SLO configuration.

        Returns:
            dict: API response.
        """
        slo_path = SSM.build_slo_id(window, slo_config, full=True)
        LOGGER.info(f'Deleting SLO "{slo_path}"')
        try:
            return self.client.delete_service_level_objective(slo_path)
        except google.api_core.exceptions.NotFound:
            LOGGER.warning(
                f'SLO "{slo_path}" does not exist in Service Monitoring API. '
                f'Skipping.')
            return None

    @staticmethod
    def build_slo_id(window, slo_config, full=False):
        """Build SLO id from SLO configuration.

        Args:
            slo_config (dict): SLO configuration.
            full (bool): If True, return full resource id including project.

        Returns:
            str: SLO id.
        """
        if 'slo_id' in slo_config:
            slo_id_part = slo_config['slo_id']
            slo_id = f'{slo_id_part}-{window}'
        else:
            slo_name = slo_config['slo_name']
            slo_id = f'{slo_name}-{window}'
        if full:
            service_path = SSM.build_service_id(slo_config, full=True)
            return f'{service_path}/serviceLevelObjectives/{slo_id}'
        return slo_id

    @staticmethod
    def compare_slo(slo1, slo2):
        """Compares 2 SLO configurations to see if they correspond to the same
        SLO.

        An SLO is deemed the same if the whole configuration is similar, except
        for the `goal` field that should be adjustable.

        Args:
            slo1 (dict): Service Monitoring API SLO configuration to compare.
            slo2 (dict): Service Monitoring API SLO configuration to compare.

        Returns:
            bool: True if the SLOs match, False otherwise.
        """
        exclude_keys = ["name"]
        slo1_copy = {k: v for k, v in slo1.items() if k not in exclude_keys}
        slo2_copy = {k: v for k, v in slo2.items() if k not in exclude_keys}
        local_json = json.dumps(slo1_copy, sort_keys=True)
        remote_json = json.dumps(slo2_copy, sort_keys=True)
        if os.environ.get('DEBUG') == '2':
            LOGGER.info("----------")
            LOGGER.info(local_json)
            LOGGER.info("----------")
            LOGGER.info(remote_json)
            LOGGER.info("----------")
            LOGGER.info(SSM.string_diff(local_json, remote_json))
        return local_json == remote_json

    @staticmethod
    def string_diff(string1, string2):
        """Diff 2 strings. Used to print comparison of JSONs for debugging.

        Args:
            string1 (str): String 1.
            string2 (str): String 2.

        Returns:
            list: List of messages pointing out differences.
        """
        lines = []
        for idx, string in enumerate(difflib.ndiff(string1, string2)):
            if string[0] == ' ':
                continue
            if string[0] == '-':
                info = u'Delete "{}" from position {}'.format(string[-1], idx)
                lines.append(info)
            elif string[0] == '+':
                info = u'Add "{}" to position {}'.format(string[-1], idx)
                lines.append(info)
        return lines

    @staticmethod
    def convert_slo_to_ssm_format(slo):
        """Convert SLO JSON to Service Monitoring API format.
        Address edge cases, like `duration` object computation.

        Args:
            slo (dict): SLO JSON object to be converted to Stackdriver Service
                Monitoring API format.

        Returns:
            dict: SLO configuration in Service Monitoring API format.
        """
        # Our local JSON is in snake case, convert it to Caml case.
        data = dict_snake_to_caml(slo)

        # The `rollingPeriod` field is in Duration format, convert it.
        try:
            period = data['rollingPeriod']
            data['rollingPeriod'] = SSM.convert_duration_to_string(period)
        except KeyError:
            pass

        # The `latency` field is in Duration format, convert it.
        try:
            latency = data['serviceLevelIndicator']['basicSli']['latency']
            threshold = latency['threshold']
            latency['threshold'] = SSM.convert_duration_to_string(threshold)
        except KeyError:
            pass

        return data

    @staticmethod
    def convert_duration_to_string(duration):
        """Convert a duration object to a duration string (in seconds).

        Args:
            duration (dict): Duration dictionary.

        Returns:
            str: Duration string.
        """
        duration_seconds = 0.000
        if 'seconds' in duration:
            duration_seconds += duration['seconds']
        if 'nanos' in duration:
            duration_seconds += duration['nanos'] * 10**(-9)
        if duration_seconds.is_integer():
            duration_str = int(duration_seconds)
        else:
            duration_str = "{:0.3f}".format(duration_seconds)
        return str(duration_str) + 's'

    @staticmethod
    def to_json(response):
        """Convert a Stackdriver Service Monitoring API response to JSON
        format.

        Args:
            response (obj): Response object.

        Returns:
            dict: Response object serialized as JSON.
        """
        return json.loads(MessageToJson(response))