Пример #1
0
    def setup(self):

        host = "fake"
        username = "******"
        password = "******"
        self.mock_traffic_sentinel = True

        self.traffic_sentinel = TrafficSentinel(host, username=username, password=password)
        self.original_urlopen = urllib2.urlopen
Пример #2
0
    def __init__(self, parameters=None, process_definition_id=None,
            schedule_process_callback=None, terminate_process_callback=None,
            process_state_callback=None,
            process_configuration=None, aggregator_config=None, *args, **kwargs):
        """Set up the Policy

        @param parameters: The parameters used by this policy to determine the
        distribution and number of VMs. This policy expects a dictionary with
        TODO

        @param process_definition_id: The process definition id to send to the
        PD on launch

        @param schedule_process_callback: A callback to schedule a process to a
        PD. Must have signature: schedule(pd_name, process_definition_id), and
        return a upid as a string

        @param terminate_process_callback: A callback to terminate a process on
        a PD. Must have signature: terminate(upid)

        @param process_state_callback: A callback to get a process state from
        a PD. Must have signature: process_state(upid)

        @param aggregator_config: configuration dict of aggregator. For traffic
        sentinel, this should look like:
          config = {
              'type': 'trafficsentinel',
              'host': 'host.name.tld',
              'port': 1235,
              'username': '******',
              'password': '******'
          }
        """

        self.schedule_process = schedule_process_callback or dummy_schedule_process_callback
        self.terminate_process = terminate_process_callback or dummy_terminate_process_callback
        self.process_state = process_state_callback or dummy_process_state_callback

        self._parameters = None
        if parameters:
            self.parameters = parameters
        else:
            self._schedule_kwargs = {}

        self.process_definition_id = process_definition_id
        self.previous_all_procs = {}
        self._status = HAState.PENDING
        self.minimum_n = 1
        self.last_scale_action = datetime.datetime.min

        if aggregator_config is None:
            raise Exception("Must provide an aggregator config")

        aggregator_type = aggregator_config.get('type', '').lower()
        if aggregator_type == 'trafficsentinel':
            host = aggregator_config.get('host')
            username = aggregator_config.get('username')
            password = aggregator_config.get('password')
            port = aggregator_config.get('port', 443)
            protocol = aggregator_config.get('protocol', 'https')
            self._sensor_aggregator = TrafficSentinel(host, username, password, port=port, protocol=protocol)
            self.app_metrics = self._sensor_aggregator.app_metrics
            self.host_metrics = self._sensor_aggregator.app_metrics
        else:
            raise Exception("Don't know what to do with %s aggregator type" % aggregator_type)

        if kwargs.get('name'):
            self.logprefix = "HA Agent (%s): " % kwargs['name']
        else:
            self.logprefix = ""
Пример #3
0
class TestTrafficSentinel(object):
    def setup(self):

        host = "fake"
        username = "******"
        password = "******"
        self.mock_traffic_sentinel = True

        self.traffic_sentinel = TrafficSentinel(host, username=username, password=password)
        self.original_urlopen = urllib2.urlopen

    def patch_urllib(self, return_string):
        self.traffic_sentinel_string = StringIO(return_string)
        urllib2.urlopen = Mock(return_value=self.traffic_sentinel_string)

    def teardown(self):
        urllib2.urlopen = self.original_urlopen

    def test_get_metric_statistics(self):

        # This is a tricky way to make sure this test passes with the real ts,
        # since a real TS will always have its own values
        test_host = os.environ.get("TRAFFIC_SENTINEL_HOST", "fake.ts.host.tld")
        loads = [0.010, 0.020]
        test_reply = "%s,%f\n" % (test_host, loads[0])
        test_reply += "%s,%f\n" % (test_host, loads[1])
        load_average = sum(loads) / float(len(loads))
        if self.mock_traffic_sentinel:
            self.patch_urllib(test_reply)

        period = 60
        start_time = datetime.now() - timedelta(days=1)
        end_time = datetime.now()
        metric_name = "load_five"
        statistics = Statistics.AVERAGE

        result = self.traffic_sentinel.get_metric_statistics(period, start_time, end_time, metric_name, statistics)

        assert len(result) > 0
        assert result.get(test_host)
        assert result[test_host].get(Statistics.AVERAGE)

        if not self.mock_traffic_sentinel:
            return

        # assert result[test_host][Statistics.AVERAGE] ~= load_average
        assert abs(result[test_host][Statistics.AVERAGE] - load_average) < 0.0000001

    def test_get_metric_statistics_app_attributes(self):

        # test_host = os.environ.get("TRAFFIC_SENTINEL_HOST", "fake.ts.host.tld")
        test_process = os.environ.get("TRAFFIC_SENTINEL_PROCESS", "fake.process")
        queue_length = 1
        ml = 1
        app_attributes = ["pid=%s&ql=%s&ml=%s" % (test_process, queue_length, ml)]
        test_reply = "%s\n" % (app_attributes[0])
        if self.mock_traffic_sentinel:
            self.patch_urllib(test_reply)

        period = 60
        start_time = datetime.now() - timedelta(days=1)
        end_time = datetime.now()
        metric_name = "app_attributes:ml"
        statistics = Statistics.AVERAGE
        dimensions = {"pid": [test_process]}

        result = self.traffic_sentinel.get_metric_statistics(
            period, start_time, end_time, metric_name, statistics, dimensions
        )
        assert len(result) > 0
        assert result.get(test_process)
        assert result[test_process].get(Statistics.AVERAGE)

        if not self.mock_traffic_sentinel:
            return

        # assert result[test_host][Statistics.AVERAGE] ~= load_average
        assert abs(result[test_process][Statistics.AVERAGE] - ml) < 0.0000001

    def test_build_script(self):

        query_fields = ["first", "second"]
        query_type = "host"
        group = 60
        interval = "201209190101.01-201209200101.01"
        dimensions = {"hostname": ["somevm.cloud.tld", "someothervm.cloud.tld"]}
        script = self.traffic_sentinel._build_script(query_fields, query_type, interval, group, dimensions)
        assert 'interval = "%s"' % interval in script
        assert 'select = "%s"' % ",".join(query_fields) in script
        assert 'where = "%s"' % "(hostname = somevm.cloud.tld | hostname = someothervm.cloud.tld)" in script
Пример #4
0
class SensorPolicy(IPolicy):

    _SENSOR_PARAMS = ('metric', 'minimum_processes', 'maximum_processes',
        'sample_period', 'sample_function', 'cooldown_period', 'scale_up_threshold',
        'scale_up_n_processes', 'scale_down_threshold', 'scale_down_n_processes')

    def __init__(self, parameters=None, process_definition_id=None,
            schedule_process_callback=None, terminate_process_callback=None,
            process_state_callback=None,
            process_configuration=None, aggregator_config=None, *args, **kwargs):
        """Set up the Policy

        @param parameters: The parameters used by this policy to determine the
        distribution and number of VMs. This policy expects a dictionary with
        TODO

        @param process_definition_id: The process definition id to send to the
        PD on launch

        @param schedule_process_callback: A callback to schedule a process to a
        PD. Must have signature: schedule(pd_name, process_definition_id), and
        return a upid as a string

        @param terminate_process_callback: A callback to terminate a process on
        a PD. Must have signature: terminate(upid)

        @param process_state_callback: A callback to get a process state from
        a PD. Must have signature: process_state(upid)

        @param aggregator_config: configuration dict of aggregator. For traffic
        sentinel, this should look like:
          config = {
              'type': 'trafficsentinel',
              'host': 'host.name.tld',
              'port': 1235,
              'username': '******',
              'password': '******'
          }
        """

        self.schedule_process = schedule_process_callback or dummy_schedule_process_callback
        self.terminate_process = terminate_process_callback or dummy_terminate_process_callback
        self.process_state = process_state_callback or dummy_process_state_callback

        self._parameters = None
        if parameters:
            self.parameters = parameters
        else:
            self._schedule_kwargs = {}

        self.process_definition_id = process_definition_id
        self.previous_all_procs = {}
        self._status = HAState.PENDING
        self.minimum_n = 1
        self.last_scale_action = datetime.datetime.min

        if aggregator_config is None:
            raise Exception("Must provide an aggregator config")

        aggregator_type = aggregator_config.get('type', '').lower()
        if aggregator_type == 'trafficsentinel':
            host = aggregator_config.get('host')
            username = aggregator_config.get('username')
            password = aggregator_config.get('password')
            port = aggregator_config.get('port', 443)
            protocol = aggregator_config.get('protocol', 'https')
            self._sensor_aggregator = TrafficSentinel(host, username, password, port=port, protocol=protocol)
            self.app_metrics = self._sensor_aggregator.app_metrics
            self.host_metrics = self._sensor_aggregator.app_metrics
        else:
            raise Exception("Don't know what to do with %s aggregator type" % aggregator_type)

        if kwargs.get('name'):
            self.logprefix = "HA Agent (%s): " % kwargs['name']
        else:
            self.logprefix = ""

    @property
    def parameters(self):
        """parameters

        a dictionary of parameters that looks like:

        metric: Name of Sensor Aggregator Metric to use for scaling decisions
        sample_period: Number of seconds of sample data to use (eg. if 3600,
            use sample data from 1 hour ago until present time
        sample_function: Statistical function to apply to sampled data. Choose
            from Average, Sum, SampleCount, Maximum, Minimum
        cooldown_period: Minimum time in seconds between scale up or scale down actions
        scale_up_threshold: If the sampled metric is above this value, scale
            up the number of processes
        scale_up_n_processes: Number of processes to scale up by
        scale_down_threshold: If the sampled metric is below this value,
            scale down the number of processes
        scale_down_n_processes: Number of processes to scale down by
        minimum_processes: Minimum number of processes to maintain
        maximum_processes: Maximum number of processes to maintain

        """
        return self._parameters

    @parameters.setter
    def parameters(self, new_parameters):

        for key in new_parameters.keys():
            if key not in _SCHEDULE_PROCESS_KWARGS + self._SENSOR_PARAMS:
                raise PolicyError("%s not a valid parameter for sensor" % key)

        if self._parameters is None:
            self._parameters = {}
        parameters = dict(self._parameters)
        for key, val in new_parameters.iteritems():
            parameters[key] = val

        if parameters.get('metric') is None:
            msg = "a metric_name must be provided"
            raise PolicyError(msg)

        try:
            parameters['sample_period'] = int(parameters.get('sample_period'))
            if parameters['sample_period'] < 0:
                raise ValueError()
        except ValueError:
            msg = "sample_period '%s' is not a positive integer" % (
                parameters.get('sample_period'))
            raise PolicyError(msg)

        if parameters.get('sample_function') not in Statistics.ALL:
            msg = "'%s' is not a known sample_function. Choose from %s" % (
                parameters.get('sample_function'), Statistics.ALL)
            raise PolicyError(msg)

        try:
            parameters['cooldown_period'] = int(parameters.get('cooldown_period'))
            if parameters['cooldown_period'] < 0:
                raise ValueError()
        except ValueError:
            msg = "cooldown_period '%s' is not a positive integer" % (
                parameters.get('cooldown_period'))
            raise PolicyError(msg)

        try:
            parameters['scale_up_threshold'] = float(parameters.get('scale_up_threshold'))
        except ValueError:
            msg = "scale_up_threshold '%s' is not a floating point number" % (
                parameters.get('scale_up_threshold'))
            raise PolicyError(msg)

        try:
            parameters['scale_up_n_processes'] = int(parameters.get('scale_up_n_processes'))
        except ValueError:
            msg = "scale_up_n_processes '%s' is not an integer" % (
                parameters.get('scale_up_n_processes'))
            raise PolicyError(msg)

        try:
            parameters['scale_down_threshold'] = float(parameters.get('scale_down_threshold'))
        except ValueError:
            msg = "scale_down_threshold '%s' is not a floating point number" % (
                parameters.get('scale_down_threshold'))
            raise PolicyError(msg)

        try:
            parameters['scale_down_n_processes'] = int(parameters.get('scale_down_n_processes'))
        except ValueError:
            msg = "scale_down_n_processes '%s' is not an integer" % (
                parameters.get('scale_up_n_processes'))
            raise PolicyError(msg)

        try:
            parameters['minimum_processes'] = int(parameters.get('minimum_processes'))
            if parameters['minimum_processes'] < 0:
                raise ValueError()
        except ValueError:
            msg = "minimum_processes '%s' is not a positive integer" % (
                parameters.get('minimum_processes'))
            raise PolicyError(msg)

        try:
            parameters['maximum_processes'] = int(parameters.get('maximum_processes'))
            if parameters['maximum_processes'] < 0:
                raise ValueError()
        except ValueError:
            msg = "maximum_processes '%s' is not a positive integer" % (
                parameters.get('maximum_processes'))
            raise PolicyError(msg)

        # phew!
        self._parameters = parameters
        self._schedule_kwargs = get_schedule_process_kwargs(new_parameters)

    def status(self):
        return self._status

    def apply_policy(self, all_procs, managed_upids):

        if self._parameters is None:
            raise PolicyError("No parameters set, unable to apply policy")

        time_since_last_scale = datetime.datetime.now() - self.last_scale_action
        if time_since_last_scale.seconds < self._parameters['cooldown_period']:
            log.debug("Returning early from apply policy because we're in cooldown")
            self._set_status(0, managed_upids)
            return managed_upids

        managed_upids = self._filter_invalid_processes(all_procs, managed_upids)

        # Get numbers from metric
        hostnames = self._get_hostnames(all_procs, managed_upids)
        period = 60
        end_time = datetime.datetime.now()  # TODO: what TZ does TS use?
        seconds = self._parameters['sample_period']
        start_time = end_time - datetime.timedelta(seconds=seconds)
        metric_name = self._parameters['metric']
        sample_function = self._parameters['sample_function']
        statistics = [sample_function, ]

        if metric_name in self.app_metrics or 'app_attributes' in metric_name:
            dimensions = {'pid': managed_upids}
        else:
            dimensions = {'hostname': hostnames}
        try:
            metric_per_host = self._sensor_aggregator.get_metric_statistics(
                period, start_time, end_time, metric_name, statistics, dimensions)
        except HTTPError as h:
            msg = "Problem getting metrics from sensor aggregator with url: '%s'" % h.filename
            log.exception(msg)
            raise PolicyError(msg)

        values = []
        for host, metric_value in metric_per_host.iteritems():
            values.append(metric_value[sample_function])

        log.debug("got metrics %s for %s" % (metric_per_host, dimensions))

        try:
            average_metric = sum(values) / len(values)
        except ZeroDivisionError:
            # TODO: this is really boneheaded. What we should do instead is
            # treat this situation specifically to scale to the minimum.
            # Users might want a metric that can go negative for example,
            # and this trick won't work
            average_metric = 0

        if average_metric > self._parameters['scale_up_threshold']:
            scale_by = self._parameters['scale_up_n_processes']
        elif average_metric < self._parameters['scale_down_threshold']:
            scale_by = - abs(self._parameters['scale_down_n_processes'])
        else:
            scale_by = 0

        wanted = len(managed_upids) + scale_by
        wanted = min(max(wanted, self._parameters['minimum_processes']), self._parameters['maximum_processes'])
        scale_by = wanted - len(managed_upids)

        if scale_by < 0:  # remove excess
            log.info("%sSensor policy scaling down by %s", self.logprefix, scale_by)
            scale_by = -1 * scale_by
            for to_scale in range(0, scale_by):
                upid = managed_upids[0]
                self.terminate_process(upid)
        elif scale_by > 0:  # Add processes
            log.info("%sSensor policy scaling up by %s", self.logprefix, scale_by)
            for to_rebalance in range(0, scale_by):
                pd_name = self._get_least_used_pd(all_procs)
                self.schedule_process(pd_name, self.process_definition_id,
                    **self._schedule_kwargs)

        if scale_by != 0:
            self.last_scale_action = datetime.datetime.now()

        self._set_status(scale_by, managed_upids)

        self.previous_all_procs = all_procs

        return managed_upids

    def _set_status(self, to_rebalance, managed_upids):
        if self._status == HAState.FAILED:
            # If already in FAILED state, keep this state.
            # Requires human intervention
            self._status = HAState.FAILED
        elif to_rebalance == 0:
            self._status = HAState.STEADY
        elif len(managed_upids) >= self.minimum_n and self._parameters['minimum_processes'] > 0:
            self._status = HAState.READY
        else:
            self._status = HAState.PENDING

    def _get_hostnames(self, all_procs, upids):
        """get hostnames of eeagents that have managed processes
        """

        hostnames = []

        for pd, procs in all_procs.iteritems():
            for proc in procs:

                if proc['upid'] not in upids:
                    continue

                hostname = proc.get('hostname')
                if hostname is None:
                    continue

                hostnames.append(hostname)

        return list(set(hostnames))