Пример #1
0
    def __init__(self,
                 scenario_filename=None,
                 container_count=None,
                 user_count=None,
                 operation_count=None,
                 run_seconds=None,
                 block_size=None,
                 _scenario_data=None,
                 version=ssbench.version,
                 delete_after=None,
                 policy=None):
        """Initializes the object from a scenario file on disk.

        :scenario_filename: path to a scenario file
        """

        self.version = version
        if _scenario_data is not None:
            # This is a "private" way to construct a Scenario object from the
            # raw JSON without a file lying around.
            self._scenario_data = _scenario_data
        elif scenario_filename is not None:
            try:
                fp = open(scenario_filename)
                self._scenario_data = json.load(fp)
            except:
                logging.exception('Error loading scenario file %r',
                                  scenario_filename)
                raise
        else:
            raise ValueError('Scenario() must get one of scenario_filename '
                             'or _scenario_data')

        # Sanity-check user_count
        if user_count is not None:
            self.user_count = user_count
        else:
            self.user_count = self._scenario_data['user_count']
        if self.user_count < 1:
            raise ValueError('user_count must be >= 1')

        # Command-line-specified values trump values in the scenario, and
        # within each of those levels, run_seconds trumps operation_count.
        if run_seconds is not None:
            self.run_seconds = run_seconds
            self.operation_count = None
        elif operation_count is not None:
            self.run_seconds = None
            self.operation_count = operation_count
        else:
            self.run_seconds = self._scenario_data.get('run_seconds', None)
            if self.run_seconds is None:
                self.operation_count = self._scenario_data.get(
                    'operation_count', None)
            else:
                self.operation_count = None

        if self.run_seconds is None and self.operation_count is None:
            raise ValueError('A scenario requires run_seconds or '
                             'operation_count')

        # storage policy to use for containers
        if policy is not None:
            self.policy = str(policy)
        else:
            self.policy = self._scenario_data.get('policy', None)
            if self.policy is not None:
                self.policy = str(self.policy)

        self.block_size = block_size
        self.name = self._scenario_data['name']
        self.container_base = self._scenario_data.get('container_base',
                                                      'ssbench')
        if container_count is not None:
            self.container_count = container_count
        else:
            self.container_count = self._scenario_data.get(
                'container_count', 100)
        policy_name = 'default_policy' if self.policy is None else self.policy
        self.containers = [
            '%s_%06d_%s' % (self.container_base, i, policy_name)
            for i in xrange(self.container_count)
        ]
        self.container_concurrency = self._scenario_data.get(
            'container_concurrency', 10)

        # Set up sizes
        self.sizes_by_name = OrderedDict()
        for size_data in self._scenario_data['sizes']:
            size_data_copy = copy.deepcopy(size_data)
            self.sizes_by_name[size_data_copy['name']] = size_data_copy
            crud_profile = size_data_copy.get(
                'crud_profile', self._scenario_data['crud_profile'])
            crud_total = sum(crud_profile)
            size_data_copy['crud_pcts'] = [
                float(c) / crud_total * 100 for c in crud_profile
            ]
            # Calculate probability thresholds for each CRUD element for this
            # object size category (defaulting to global crud profile).
            size_data_copy['crud_thresholds'] = [1, 1, 1, 1]
            self._thresholds_for(size_data_copy['crud_thresholds'], range(4),
                                 crud_profile)

        # Calculate probability thresholds for each size (from the
        # initial_files)
        self.bench_size_thresholds = OrderedDict()
        self._thresholds_for(
            self.bench_size_thresholds,
            filter(lambda n: n in self._scenario_data['initial_files'],
                   self.sizes_by_name.keys()),
            self._scenario_data['initial_files'])

        # Expiring time(sec) for create object.
        if delete_after is not None:
            self.delete_after = delete_after
        else:
            self.delete_after = self._scenario_data.get('delete_after')
Пример #2
0
    def calculate_scenario_stats(self, scenario, results, nth_pctile=95):
        """Given a list of worker job result dicts, compute various statistics.

        :results: A list of worker job result dicts
        :returns: A stats python dict which looks like:
            SERIES_STATS = {
                'min': 1.1,
                'max': 1.1,
                'avg': 1.1,
                'std_dev': 1.1,
                'median': 1.1,
            }
            {
                'agg_stats': {
                    'worker_count': 1,
                    'start': 1.1,
                    'stop': 1.1,
                    'req_count': 1,
                    'avg_req_per_sec': 1.1, # req_count / (stop - start)?
                    'first_byte_latency': SERIES_STATS,
                    'last_byte_latency': SERIES_STATS,
                },
                'worker_stats': {
                    1: {  # keys are worker_ids
                        'start': 1.1,
                        'stop': 1.1,
                        'req_count': 1,
                        'avg_req_per_sec': 1.1, # req_count / (stop - start)?
                        'first_byte_latency': SERIES_STATS,
                        'last_byte_latency': SERIES_STATS,
                    },
                    # ...
                },
                'op_stats': {
                    CREATE_OBJECT: { # keys are CRUD constants: CREATE_OBJECT, READ_OBJECT, etc.
                        'req_count': 1, # num requests of this CRUD type
                        'avg_req_per_sec': 1.1, # total_requests / sum(last_byte_latencies)
                        'first_byte_latency': SERIES_STATS,
                        'last_byte_latency': SERIES_STATS,
                        'size_stats': {
                            'small': { # keys are size_str values
                                'req_count': 1, # num requests of this type and size
                                'avg_req_per_sec': 1.1, # total_requests / sum(last_byte_latencies)
                                'first_byte_latency': SERIES_STATS,
                                'last_byte_latency': SERIES_STATS,
                            },
                            # ...
                        },
                    },
                    # ...
                },
                'size_stats': {
                    'small': { # keys are size_str values
                        'req_count': 1, # num requests of this size (for all CRUD types)
                        'avg_req_per_sec': 1.1, # total_requests / sum(last_byte_latencies)
                        'first_byte_latency': SERIES_STATS,
                        'last_byte_latency': SERIES_STATS,
                    },
                    # ...
                },
                'time_series': {
                    'start': 1, # epoch time of first data point
                    'data': [
                        1, # number of requests finishing during this second
                        # ...
                    ],
                },
            }
        """
        # Each result looks like:
        # {
        #   'worker_id': 1,
        #   'type': 'get_object',
        #   'size': 4900000,
        #   'first_byte_latency': 0.9137639999389648,
        #   'last_byte_latency': 0.913769006729126,
        #   'completed_at': 1324372892.360802,
        #}
        # OR
        # {
        #   'worker_id': 1,
        #   'type': 'get_object',
        #   'completed_at': 1324372892.360802,
        #   'exception': '...',
        # }
        logging.info('Calculating statistics for %d result items...',
                     len(results))
        agg_stats = dict(start=2 ** 32, stop=0, req_count=0)
        op_stats = {}
        for crud_type in [ssbench.CREATE_OBJECT, ssbench.READ_OBJECT,
                          ssbench.UPDATE_OBJECT, ssbench.DELETE_OBJECT]:
            op_stats[crud_type] = dict(
                req_count=0, avg_req_per_sec=0,
                size_stats=OrderedDict.fromkeys(scenario.sizes_by_name.keys()))

        req_completion_seconds = {}
        start_time = 0
        completion_time_max = 0
        completion_time_min = 2 ** 32
        stats = dict(
            nth_pctile=nth_pctile,
            agg_stats=agg_stats,
            worker_stats={},
            op_stats=op_stats,
            size_stats=OrderedDict.fromkeys(scenario.sizes_by_name.keys()))
        for result in results:
            if 'exception' in result:
                # skip but log exceptions
                logging.warn('calculate_scenario_stats: exception from '
                             'worker %d: %s',
                             result['worker_id'], result['exception'])
                logging.info(result['traceback'])
                continue
            completion_time = int(result['completed_at'])
            if completion_time < completion_time_min:
                completion_time_min = completion_time
                start_time = completion_time - result['last_byte_latency']
            if completion_time > completion_time_max:
                completion_time_max = completion_time
            req_completion_seconds[completion_time] = \
                1 + req_completion_seconds.get(completion_time, 0)
            result['start'] = (
                result['completed_at'] - result['last_byte_latency'])

            # Stats per-worker
            if result['worker_id'] not in stats['worker_stats']:
                stats['worker_stats'][result['worker_id']] = {}
            self._add_result_to(stats['worker_stats'][result['worker_id']],
                                result)

            # Stats per-file-size
            if not stats['size_stats'][result['size_str']]:
                stats['size_stats'][result['size_str']] = {}
            self._add_result_to(stats['size_stats'][result['size_str']],
                                result)

            self._add_result_to(agg_stats, result)
            self._add_result_to(op_stats[result['type']], result)

            # Stats per-operation-per-file-size
            if not op_stats[result['type']]['size_stats'][result['size_str']]:
                op_stats[result['type']]['size_stats'][result['size_str']] = {}
            self._add_result_to(
                op_stats[result['type']]['size_stats'][result['size_str']],
                result)
        agg_stats['worker_count'] = len(stats['worker_stats'].keys())
        self._compute_req_per_sec(agg_stats)
        self._compute_latency_stats(agg_stats, nth_pctile)
        for worker_stats in stats['worker_stats'].values():
            self._compute_req_per_sec(worker_stats)
            self._compute_latency_stats(worker_stats, nth_pctile)
        for op_stat, op_stats_dict in op_stats.iteritems():
            if op_stats_dict['req_count']:
                self._compute_req_per_sec(op_stats_dict)
                self._compute_latency_stats(op_stats_dict, nth_pctile)
                for size_str, size_stats in \
                        op_stats_dict['size_stats'].iteritems():
                    if size_stats:
                        self._compute_req_per_sec(size_stats)
                        self._compute_latency_stats(size_stats, nth_pctile)
                    else:
                        op_stats_dict['size_stats'].pop(size_str)
        for size_str, size_stats in stats['size_stats'].iteritems():
            if size_stats:
                self._compute_req_per_sec(size_stats)
                self._compute_latency_stats(size_stats, nth_pctile)
            else:
                stats['size_stats'].pop(size_str)
        time_series_data = [req_completion_seconds.get(t, 0)
                            for t in range(completion_time_min,
                                           completion_time_max + 1)]
        stats['time_series'] = dict(start=completion_time_min,
                                    start_time=start_time,
                                    stop=completion_time_max,
                                    data=time_series_data)

        return stats
Пример #3
0
    def __init__(self, scenario_filename=None, container_count=None,
                 user_count=None, operation_count=None, run_seconds=None,
                 _scenario_data=None, version=ssbench.version):
        """Initializes the object from a scenario file on disk.

        :scenario_filename: path to a scenario file
        """

        self.version = version
        if _scenario_data is not None:
            # This is a "private" way to construct a Scenario object from the
            # raw JSON without a file lying around.
            self._scenario_data = _scenario_data
        elif scenario_filename is not None:
            try:
                fp = open(scenario_filename)
                self._scenario_data = json.load(fp)
            except:
                logging.exception('Error loading scenario file %r',
                                scenario_filename)
                raise
        else:
            raise ValueError('Scenario() must get one of scenario_filename '
                             'or _scenario_data')

        # Sanity-check user_count
        if user_count is not None:
            self.user_count = user_count
        else:
            self.user_count = self._scenario_data['user_count']
        if self.user_count < 1:
            raise ValueError('user_count must be > 1')

        # Command-line-specified values trump values in the scenario, and
        # within each of those levels, run_seconds trumps operation_count.
        if run_seconds is not None:
            self.run_seconds = run_seconds
            self.operation_count = None
        elif operation_count is not None:
            self.run_seconds = None
            self.operation_count = operation_count
        else:
            self.run_seconds = self._scenario_data.get('run_seconds', None)
            if self.run_seconds is None:
                self.operation_count = self._scenario_data.get(
                    'operation_count', None)
            else:
                self.operation_count = None

        if self.run_seconds is None and self.operation_count is None:
            raise ValueError('A scenario requires run_seconds or '
                             'operation_count')

        self.name = self._scenario_data['name']
        self.container_base = self._scenario_data.get('container_base',
                                                      'ssbench')
        if container_count is not None:
            self.container_count = container_count
        else:
            self.container_count = self._scenario_data.get(
                'container_count', 100)
        self.containers = ['%s_%06d' % (self.container_base, i)
                           for i in xrange(self.container_count)]
        self.container_concurrency = self._scenario_data.get(
            'container_concurrency', 10)

        # Set up sizes
        self.sizes_by_name = OrderedDict()
        for size_data in self._scenario_data['sizes']:
            size_data_copy = copy.deepcopy(size_data)
            self.sizes_by_name[size_data_copy['name']] = size_data_copy
            crud_profile = size_data_copy.get(
                'crud_profile', self._scenario_data['crud_profile'])
            crud_total = sum(crud_profile)
            size_data_copy['crud_pcts'] = [
                float(c) / crud_total * 100 for c in crud_profile]
            # Calculate probability thresholds for each CRUD element for this
            # object size category (defaulting to global crud profile).
            size_data_copy['crud_thresholds'] = [1, 1, 1, 1]
            self._thresholds_for(size_data_copy['crud_thresholds'],
                                 range(4), crud_profile)

        # Calculate probability thresholds for each size (from the
        # initial_files)
        self.bench_size_thresholds = OrderedDict()
        self._thresholds_for(
            self.bench_size_thresholds,
            filter(lambda n: n in self._scenario_data['initial_files'],
                   self.sizes_by_name.keys()),
            self._scenario_data['initial_files'])
Пример #4
0
class Scenario(object):
    """Encapsulation of a benchmark "CRUD" scenario."""
    class StopGeneratingException(Exception):
        pass

    def __init__(self,
                 scenario_filename=None,
                 container_count=None,
                 user_count=None,
                 operation_count=None,
                 run_seconds=None,
                 block_size=None,
                 _scenario_data=None,
                 version=ssbench.version,
                 delete_after=None,
                 policy=None):
        """Initializes the object from a scenario file on disk.

        :scenario_filename: path to a scenario file
        """

        self.version = version
        if _scenario_data is not None:
            # This is a "private" way to construct a Scenario object from the
            # raw JSON without a file lying around.
            self._scenario_data = _scenario_data
        elif scenario_filename is not None:
            try:
                fp = open(scenario_filename)
                self._scenario_data = json.load(fp)
            except:
                logging.exception('Error loading scenario file %r',
                                  scenario_filename)
                raise
        else:
            raise ValueError('Scenario() must get one of scenario_filename '
                             'or _scenario_data')

        # Sanity-check user_count
        if user_count is not None:
            self.user_count = user_count
        else:
            self.user_count = self._scenario_data['user_count']
        if self.user_count < 1:
            raise ValueError('user_count must be >= 1')

        # Command-line-specified values trump values in the scenario, and
        # within each of those levels, run_seconds trumps operation_count.
        if run_seconds is not None:
            self.run_seconds = run_seconds
            self.operation_count = None
        elif operation_count is not None:
            self.run_seconds = None
            self.operation_count = operation_count
        else:
            self.run_seconds = self._scenario_data.get('run_seconds', None)
            if self.run_seconds is None:
                self.operation_count = self._scenario_data.get(
                    'operation_count', None)
            else:
                self.operation_count = None

        if self.run_seconds is None and self.operation_count is None:
            raise ValueError('A scenario requires run_seconds or '
                             'operation_count')

        # storage policy to use for containers
        if policy is not None:
            self.policy = str(policy)
        else:
            self.policy = self._scenario_data.get('policy', None)
            if self.policy is not None:
                self.policy = str(self.policy)

        self.block_size = block_size
        self.name = self._scenario_data['name']
        self.container_base = self._scenario_data.get('container_base',
                                                      'ssbench')
        if container_count is not None:
            self.container_count = container_count
        else:
            self.container_count = self._scenario_data.get(
                'container_count', 100)
        policy_name = 'default_policy' if self.policy is None else self.policy
        self.containers = [
            '%s_%06d_%s' % (self.container_base, i, policy_name)
            for i in xrange(self.container_count)
        ]
        self.container_concurrency = self._scenario_data.get(
            'container_concurrency', 10)

        # Set up sizes
        self.sizes_by_name = OrderedDict()
        for size_data in self._scenario_data['sizes']:
            size_data_copy = copy.deepcopy(size_data)
            self.sizes_by_name[size_data_copy['name']] = size_data_copy
            crud_profile = size_data_copy.get(
                'crud_profile', self._scenario_data['crud_profile'])
            crud_total = sum(crud_profile)
            size_data_copy['crud_pcts'] = [
                float(c) / crud_total * 100 for c in crud_profile
            ]
            # Calculate probability thresholds for each CRUD element for this
            # object size category (defaulting to global crud profile).
            size_data_copy['crud_thresholds'] = [1, 1, 1, 1]
            self._thresholds_for(size_data_copy['crud_thresholds'], range(4),
                                 crud_profile)

        # Calculate probability thresholds for each size (from the
        # initial_files)
        self.bench_size_thresholds = OrderedDict()
        self._thresholds_for(
            self.bench_size_thresholds,
            filter(lambda n: n in self._scenario_data['initial_files'],
                   self.sizes_by_name.keys()),
            self._scenario_data['initial_files'])

        # Expiring time(sec) for create object.
        if delete_after is not None:
            self.delete_after = delete_after
        else:
            self.delete_after = self._scenario_data.get('delete_after')

    def packb(self):
        return msgpack.packb({
            '_scenario_data': self._scenario_data,
            'name': self.name,
            'version': self.version,
            'user_count': self.user_count,
            'operation_count': self.operation_count,
            'run_seconds': self.run_seconds,
            'container_base': self.container_base,
            'container_count': self.container_count,
            'container_concurrency': self.container_concurrency,
            'delete_after': self.delete_after,
        })

    @classmethod
    def unpackb(cls, packed_or_unpacker):
        if isinstance(packed_or_unpacker, msgpack.Unpacker):
            data = packed_or_unpacker.next()
        else:
            data = msgpack.unpackb(packed_or_unpacker)
        scenario = cls(container_count=data['container_count'],
                       user_count=data['user_count'],
                       operation_count=data['operation_count'],
                       run_seconds=data['run_seconds'],
                       version=data['version'],
                       _scenario_data=data['_scenario_data'],
                       delete_after=data.get('delete_after'))
        return scenario

    @property
    def crud_pcts(self):
        total = sum(self._scenario_data['crud_profile'])
        return [
            float(c) / total * 100 for c in self._scenario_data['crud_profile']
        ]

    def _thresholds_for(self, target, indices, data):
        initial_sum = sum(map(lambda i: data[i], indices))
        last = 0
        for idx in indices:
            last = last + float(data[idx]) / initial_sum
            target[idx] = last

    def job(self, size_str, **kwargs):
        job = {'size_str': size_str}
        job.update(kwargs)
        return job

    def create_job(self, size_str, i, container=None, head_first=False):
        """
        Creates job dict which will create an object.
        """

        if container is None:
            container = random.choice(self.containers)

        return self.job(size_str,
                        type=ssbench.CREATE_OBJECT,
                        container=container,
                        name='%s_%06d' % (size_str, i),
                        size=random.randint(
                            self.sizes_by_name[size_str]['size_min'],
                            self.sizes_by_name[size_str]['size_max']),
                        block_size=self.block_size,
                        head_first=head_first,
                        delete_after=self.delete_after)

    def bench_job(self, size_str, crud_index, i):
        """Creates a benchmark work job dict of a given size and crud "index"
        (where 0 is Create, 1 is Read, etc.).

        :size_str: One of the size strings defined in the scenario file
        :crud_index: An index into the CRUD array (0 is Create, etc.)
        :i: The job index
        :returns: A dictionary representing benchmark work job
        """

        if crud_index == 0:
            return self.create_job(size_str, i)
        elif crud_index == 1:
            return self.job(size_str,
                            type=ssbench.READ_OBJECT,
                            block_size=self.block_size)
        elif crud_index == 2:
            return self.job(size_str,
                            type=ssbench.UPDATE_OBJECT,
                            block_size=self.block_size,
                            size=random.randint(
                                self.sizes_by_name[size_str]['size_min'],
                                self.sizes_by_name[size_str]['size_max']))
        elif crud_index == 3:
            return self.job(size_str, type=ssbench.DELETE_OBJECT)

    def initial_jobs(self):
        """
        Generator for the worker jobs necessary to initialize the cluster
        contents for the scenario.

        :returns: A generator which yields job objects (dicts)
        """

        count_by_size = copy.copy(self._scenario_data['initial_files'])
        index_per_size = dict.fromkeys(count_by_size.iterkeys(), 1)
        container_iter = itertools.cycle(self.containers)

        yielded = True
        while yielded:
            yielded = False
            for size_str in filter(
                    lambda n: n in self._scenario_data['initial_files'],
                    self.sizes_by_name.keys()):
                if count_by_size[size_str]:
                    yield self.create_job(size_str,
                                          index_per_size[size_str],
                                          container=container_iter.next(),
                                          head_first=True)
                    count_by_size[size_str] -= 1
                    index_per_size[size_str] += 1
                    yielded = True

    def bench_jobs(self):
        """
        Generator for the worker jobs necessary to actually run the scenario.

        If self.run_seconds is set, jobs will be for about that many seconds,
        regardless of any value for self.operation_count.

        If self.run_seconds is not set, exactly self.operation_count jobs will
        be yielded.

        :returns: A generator which yields job objects (dicts)
        """

        max_index_size = max(self._scenario_data['initial_files'].itervalues())

        keep_running = [True]
        prev_alarm = None
        if self.run_seconds:

            def _stop_running(signal, frame):
                signal = signal  # appease the linter
                frame = frame  # appease the linter
                keep_running[0] = False

            prev_alarm = signal.signal(signal.SIGALRM, _stop_running)
            signal.alarm(self.run_seconds)

        index = max_index_size + 1
        yielded = 0
        while (self.run_seconds and keep_running[0]) or \
                yielded < self.operation_count:
            r = random.random()  # uniform on [0, 1)
            for size_str, prob in self.bench_size_thresholds.iteritems():
                if r < prob:
                    this_size_str = size_str
                    break
            # Determine which C/R/U/D type this job will be
            size_crud = self.sizes_by_name[this_size_str]['crud_thresholds']
            r = random.random()  # uniform on [0, 1)
            for crud_index, prob in enumerate(size_crud):
                if r < prob:
                    this_crud_index = crud_index
                    break

            yield self.bench_job(this_size_str, this_crud_index, index)

            index += 1
            yielded += 1

        if prev_alarm:
            # Deliberately avoiding the complexity of tyring to handle a
            # pre-existing alarm timer value, since that shouldn't be
            # necessary for all known applications of Scenario.
            signal.signal(signal.SIGALRM, prev_alarm)
Пример #5
0
class Scenario(object):
    """Encapsulation of a benchmark "CRUD" scenario."""

    class StopGeneratingException(Exception):
        pass

    def __init__(self, scenario_filename=None, container_count=None,
                 user_count=None, operation_count=None, run_seconds=None,
                 _scenario_data=None, version=ssbench.version):
        """Initializes the object from a scenario file on disk.

        :scenario_filename: path to a scenario file
        """

        self.version = version
        if _scenario_data is not None:
            # This is a "private" way to construct a Scenario object from the
            # raw JSON without a file lying around.
            self._scenario_data = _scenario_data
        elif scenario_filename is not None:
            try:
                fp = open(scenario_filename)
                self._scenario_data = json.load(fp)
            except:
                logging.exception('Error loading scenario file %r',
                                scenario_filename)
                raise
        else:
            raise ValueError('Scenario() must get one of scenario_filename '
                             'or _scenario_data')

        # Sanity-check user_count
        if user_count is not None:
            self.user_count = user_count
        else:
            self.user_count = self._scenario_data['user_count']
        if self.user_count < 1:
            raise ValueError('user_count must be > 1')

        # Command-line-specified values trump values in the scenario, and
        # within each of those levels, run_seconds trumps operation_count.
        if run_seconds is not None:
            self.run_seconds = run_seconds
            self.operation_count = None
        elif operation_count is not None:
            self.run_seconds = None
            self.operation_count = operation_count
        else:
            self.run_seconds = self._scenario_data.get('run_seconds', None)
            if self.run_seconds is None:
                self.operation_count = self._scenario_data.get(
                    'operation_count', None)
            else:
                self.operation_count = None

        if self.run_seconds is None and self.operation_count is None:
            raise ValueError('A scenario requires run_seconds or '
                             'operation_count')

        self.name = self._scenario_data['name']
        self.container_base = self._scenario_data.get('container_base',
                                                      'ssbench')
        if container_count is not None:
            self.container_count = container_count
        else:
            self.container_count = self._scenario_data.get(
                'container_count', 100)
        self.containers = ['%s_%06d' % (self.container_base, i)
                           for i in xrange(self.container_count)]
        self.container_concurrency = self._scenario_data.get(
            'container_concurrency', 10)

        # Set up sizes
        self.sizes_by_name = OrderedDict()
        for size_data in self._scenario_data['sizes']:
            size_data_copy = copy.deepcopy(size_data)
            self.sizes_by_name[size_data_copy['name']] = size_data_copy
            crud_profile = size_data_copy.get(
                'crud_profile', self._scenario_data['crud_profile'])
            crud_total = sum(crud_profile)
            size_data_copy['crud_pcts'] = [
                float(c) / crud_total * 100 for c in crud_profile]
            # Calculate probability thresholds for each CRUD element for this
            # object size category (defaulting to global crud profile).
            size_data_copy['crud_thresholds'] = [1, 1, 1, 1]
            self._thresholds_for(size_data_copy['crud_thresholds'],
                                 range(4), crud_profile)

        # Calculate probability thresholds for each size (from the
        # initial_files)
        self.bench_size_thresholds = OrderedDict()
        self._thresholds_for(
            self.bench_size_thresholds,
            filter(lambda n: n in self._scenario_data['initial_files'],
                   self.sizes_by_name.keys()),
            self._scenario_data['initial_files'])

    def packb(self):
        return msgpack.packb({
            '_scenario_data': self._scenario_data,
            'name': self.name,
            'version': self.version,
            'user_count': self.user_count,
            'operation_count': self.operation_count,
            'run_seconds': self.run_seconds,
            'container_base': self.container_base,
            'container_count': self.container_count,
            'container_concurrency': self.container_concurrency,
        })

    @classmethod
    def unpackb(cls, packed_or_unpacker):
        if isinstance(packed_or_unpacker, msgpack.Unpacker):
            data = packed_or_unpacker.next()
        else:
            data = msgpack.unpackb(packed_or_unpacker)
        scenario = cls(container_count=data['container_count'],
                       user_count=data['user_count'],
                       operation_count=data['operation_count'],
                       run_seconds=data['run_seconds'],
                       version=data['version'],
                       _scenario_data=data['_scenario_data'])
        return scenario

    @property
    def crud_pcts(self):
        total = sum(self._scenario_data['crud_profile'])
        return [float(c) / total * 100
                for c in self._scenario_data['crud_profile']]

    def _thresholds_for(self, target, indices, data):
        initial_sum = sum(map(lambda i: data[i], indices))
        last = 0
        for idx in indices:
            last = last + float(data[idx]) / initial_sum
            target[idx] = last

    def job(self, size_str, **kwargs):
        job = {'size_str': size_str}
        job.update(kwargs)
        return job

    def create_job(self, size_str, i):
        """
        Creates job dict which will create an object.
        """

        return self.job(size_str,
                        type=ssbench.CREATE_OBJECT,
                        container=random.choice(self.containers),
                        name='%s_%06d' % (size_str, i),
                        size=random.randint(
                            self.sizes_by_name[size_str]['size_min'],
                            self.sizes_by_name[size_str]['size_max']))

    def bench_job(self, size_str, crud_index, i):
        """Creates a benchmark work job dict of a given size and crud "index"
        (where 0 is Create, 1 is Read, etc.).

        :size_str: One of the size strings defined in the scenario file
        :crud_index: An index into the CRUD array (0 is Create, etc.)
        :i: The job index
        :returns: A dictionary representing benchmark work job
        """

        if crud_index == 0:
            return self.create_job(size_str, i)
        elif crud_index == 1:
            return self.job(size_str, type=ssbench.READ_OBJECT)
        elif crud_index == 2:
            return self.job(
                size_str, type=ssbench.UPDATE_OBJECT,
                size=random.randint(
                    self.sizes_by_name[size_str]['size_min'],
                    self.sizes_by_name[size_str]['size_max']))
        elif crud_index == 3:
            return self.job(size_str, type=ssbench.DELETE_OBJECT)

    def initial_jobs(self):
        """
        Generator for the worker jobs necessary to initialize the cluster
        contents for the scenario.

        :returns: A generator which yields job objects (dicts)
        """

        count_by_size = copy.copy(self._scenario_data['initial_files'])
        index_per_size = dict.fromkeys(count_by_size.iterkeys(), 1)

        yielded = True
        while yielded:
            yielded = False
            for size_str in filter(
                    lambda n: n in self._scenario_data['initial_files'],
                    self.sizes_by_name.keys()):
                if count_by_size[size_str]:
                    yield self.create_job(size_str, index_per_size[size_str])
                    count_by_size[size_str] -= 1
                    index_per_size[size_str] += 1
                    yielded = True

    def bench_jobs(self):
        """
        Generator for the worker jobs necessary to actually run the scenario.

        If self.run_seconds is set, jobs will be for about that many seconds,
        regardless of any value for self.operation_count.

        If self.run_seconds is not set, exactly self.operation_count jobs will
        be yielded.

        :returns: A generator which yields job objects (dicts)
        """

        max_index_size = max(self._scenario_data['initial_files'].itervalues())

        keep_running = [True]
        prev_alarm = None
        if self.run_seconds:
            def _stop_running(signal, frame):
                keep_running[0] = False
            prev_alarm = signal.signal(signal.SIGALRM, _stop_running)
            signal.alarm(self.run_seconds)

        index = max_index_size + 1
        yielded = 0
        while (self.run_seconds and keep_running[0]) or \
                yielded < self.operation_count:
            r = random.random()  # uniform on [0, 1)
            for size_str, prob in self.bench_size_thresholds.iteritems():
                if r < prob:
                    this_size_str = size_str
                    break
            # Determine which C/R/U/D type this job will be
            size_crud = self.sizes_by_name[this_size_str]['crud_thresholds']
            r = random.random()  # uniform on [0, 1)
            for crud_index, prob in enumerate(size_crud):
                if r < prob:
                    this_crud_index = crud_index
                    break

            yield self.bench_job(this_size_str, this_crud_index, index)

            index += 1
            yielded += 1

        if prev_alarm:
            # Deliberately avoiding the complexity of tyring to handle a
            # pre-existing alarm timer value, since that shouldn't be
            # necessary for all known applications of Scenario.
            signal.signal(signal.SIGALRM, prev_alarm)
Пример #6
0
    def calculate_scenario_stats(self, nth_pctile=95, format_numbers=True):
        """Compute various statistics from worker job result dicts.

        :param nth_pctile: Use this percentile when calculating the stats
        :param format_numbers: Should various floating-point numbers be
        formatted as strings or left full-precision floats
        :returns: A stats python dict which looks something like:
            SERIES_STATS = {
                'min': 1.1,
                'max': 1.1,
                'avg': 1.1,
                'std_dev': 1.1,
                'median': 1.1,
            }
            {
                'agg_stats': {
                    'worker_count': 1,
                    'start': 1.1,
                    'stop': 1.1,
                    'req_count': 1,
                    'retries': 0,
                    'errors' : 0,
                    'avg_req_per_sec': 1.1, # req_count / (stop - start)?
                    'retry_rate': 0.0,
                    'first_byte_latency': SERIES_STATS,
                    'last_byte_latency': SERIES_STATS,
                },
                'worker_stats': {
                    1: {  # keys are worker_ids
                        'start': 1.1,
                        'stop': 1.1,
                        'req_count': 1,
                        'retries': 0,
                        'retry_rate': 0.0,
                        'errors': 0,
                        'avg_req_per_sec': 1.1, # req_count / (stop - start)?
                        'first_byte_latency': SERIES_STATS,
                        'last_byte_latency': SERIES_STATS,
                    },
                    # ...
                },
                'op_stats': {
                    CREATE_OBJECT: { # keys are CRUD constants: CREATE_OBJECT, READ_OBJECT, etc.
                        'req_count': 1, # num requests of this CRUD type
                        'avg_req_per_sec': 1.1, # total_requests / sum(last_byte_latencies)
                        'first_byte_latency': SERIES_STATS,
                        'last_byte_latency': SERIES_STATS,
                        'size_stats': {
                            'small': { # keys are size_str values
                                'req_count': 1, # num requests of this type and size
                                'retries': 0, # num of retries
                                'avg_req_per_sec': 1.1, # total_requests / sum(last_byte_latencies)
                                'errors': 0,
                                'retry_rate': 0.0,
                                'first_byte_latency': SERIES_STATS,
                                'last_byte_latency': SERIES_STATS,
                            },
                            # ...
                        },
                    },
                    # ...
                },
                'size_stats': {
                    'small': { # keys are size_str values
                        'req_count': 1, # num requests of this size (for all CRUD types)
                        'retries': 0, # num of retries
                        'acutual_request_count': 1, # num requests includes retries
                        'avg_req_per_sec': 1.1, # total_requests / sum(last_byte_latencies)
                        'errors': 0,
                        'retry_rate': 0.0,
                        'first_byte_latency': SERIES_STATS,
                        'last_byte_latency': SERIES_STATS,
                    },
                    # ...
                },
                'time_series': {
                    'start': 1, # epoch time of first data point
                    'data': [
                        1, # number of requests finishing during this second
                        # ...
                    ],
                },
            }
        """
        # Each result looks like:
        # {
        #   'worker_id': 1,
        #   'type': 'get_object',
        #   'size': 4900000,
        #   'size_str': 'large',
        #   'first_byte_latency': 0.9137639999389648,
        #   'last_byte_latency': 0.913769006729126,
        #   'retries': 1
        #   'completed_at': 1324372892.360802,
        # }
        # OR
        # {
        #   'worker_id': 1,
        #   'type': 'get_object',
        #   'size_str': 'large'
        #   'completed_at': 1324372892.360802,
        #   'retries': 1
        #   'exception': '...',
        # }
        logging.info('Calculating statistics...')
        agg_stats = dict(start=2**32, stop=0, req_count=0)
        op_stats = {}
        for crud_type in [
                ssbench.CREATE_OBJECT, ssbench.READ_OBJECT,
                ssbench.UPDATE_OBJECT, ssbench.DELETE_OBJECT
        ]:
            op_stats[crud_type] = dict(req_count=0,
                                       avg_req_per_sec=0,
                                       size_stats=OrderedDict.fromkeys(
                                           self.scenario.sizes_by_name.keys()))

        req_completion_seconds = {}
        start_time = 0
        completion_time_max = 0
        completion_time_min = 2**32
        stats = dict(nth_pctile=nth_pctile,
                     agg_stats=agg_stats,
                     worker_stats={},
                     op_stats=op_stats,
                     size_stats=OrderedDict.fromkeys(
                         self.scenario.sizes_by_name.keys()))
        for results in self.unpacker:
            skipped = 0
            for result in results:
                try:
                    res_completed_at = result['completed_at']
                    res_completion_time = int(res_completed_at)
                    res_worker_id = result['worker_id']
                    res_type = result['type']
                    res_size_str = result['size_str']
                except KeyError as err:
                    logging.info('Skipped result with missing keys (%r): %r',
                                 err, result)
                    skipped += 1
                    continue

                try:
                    res_exception = result['exception']
                except KeyError:
                    try:
                        res_last_byte_latency = result['last_byte_latency']
                    except KeyError:
                        logging.info(
                            'Skipped result with missing'
                            ' last_byte_latency key: %r', result)
                        skipped += 1
                        continue
                    if res_completion_time < completion_time_min:
                        completion_time_min = res_completion_time
                        start_time = (res_completion_time -
                                      res_last_byte_latency)
                    if res_completion_time > completion_time_max:
                        completion_time_max = res_completion_time
                    req_completion_seconds[res_completion_time] = \
                        1 + req_completion_seconds.get(res_completion_time, 0)
                    result['start'] = res_completed_at - res_last_byte_latency
                else:
                    # report log exceptions
                    logging.warn(
                        'calculate_scenario_stats: exception from '
                        'worker %d: %s', res_worker_id, res_exception)
                    try:
                        res_traceback = result['traceback']
                    except KeyError:
                        logging.warn('traceback missing')
                    else:
                        logging.info(res_traceback)

                # Stats per-worker
                if res_worker_id not in stats['worker_stats']:
                    stats['worker_stats'][res_worker_id] = {}
                self._add_result_to(stats['worker_stats'][res_worker_id],
                                    result)

                # Stats per-file-size
                try:
                    val = stats['size_stats'][res_size_str]
                except KeyError:
                    stats['size_stats'][res_size_str] = {}
                else:
                    if not val:
                        stats['size_stats'][res_size_str] = {}
                self._add_result_to(stats['size_stats'][res_size_str], result)

                self._add_result_to(agg_stats, result)

                type_stats = op_stats[res_type]
                self._add_result_to(type_stats, result)

                # Stats per-operation-per-file-size
                try:
                    val = type_stats['size_stats'][res_size_str]
                except KeyError:
                    type_stats['size_stats'][res_size_str] = {}
                else:
                    if not val:
                        type_stats['size_stats'][res_size_str] = {}
                self._add_result_to(type_stats['size_stats'][res_size_str],
                                    result)
            if skipped > 0:
                logging.warn("Total number of results skipped: %d", skipped)

        agg_stats['worker_count'] = len(stats['worker_stats'].keys())
        self._compute_req_per_sec(agg_stats)
        self._compute_retry_rate(agg_stats)
        self._compute_latency_stats(agg_stats, nth_pctile, format_numbers)

        jobs_per_worker = []
        for worker_stats in stats['worker_stats'].values():
            jobs_per_worker.append(worker_stats['req_count'])
            self._compute_req_per_sec(worker_stats)
            self._compute_retry_rate(worker_stats)
            self._compute_latency_stats(worker_stats, nth_pctile,
                                        format_numbers)
        stats['jobs_per_worker_stats'] = self._series_stats(
            jobs_per_worker, nth_pctile, format_numbers)
        logging.debug('Jobs per worker stats:\n' +
                      pformat(stats['jobs_per_worker_stats']))

        for op_stats_dict in op_stats.itervalues():
            if op_stats_dict['req_count']:
                self._compute_req_per_sec(op_stats_dict)
                self._compute_retry_rate(op_stats_dict)
                self._compute_latency_stats(op_stats_dict, nth_pctile,
                                            format_numbers)
                for size_str, size_stats in \
                        op_stats_dict['size_stats'].iteritems():
                    if size_stats:
                        self._compute_req_per_sec(size_stats)
                        self._compute_retry_rate(size_stats)
                        self._compute_latency_stats(size_stats, nth_pctile,
                                                    format_numbers)
                    else:
                        op_stats_dict['size_stats'].pop(size_str)
        for size_str, size_stats in stats['size_stats'].iteritems():
            if size_stats:
                self._compute_req_per_sec(size_stats)
                self._compute_retry_rate(size_stats)
                self._compute_latency_stats(size_stats, nth_pctile,
                                            format_numbers)
            else:
                stats['size_stats'].pop(size_str)
        time_series_data = [
            req_completion_seconds.get(t, 0)
            for t in range(completion_time_min, completion_time_max + 1)
        ]
        stats['time_series'] = dict(start=completion_time_min,
                                    start_time=start_time,
                                    stop=completion_time_max,
                                    data=time_series_data)

        return stats
Пример #7
0
    def calculate_scenario_stats(self, nth_pctile=95, format_numbers=True):
        """Compute various statistics from worker job result dicts.

        :param nth_pctile: Use this percentile when calculating the stats
        :param format_numbers: Should various floating-point numbers be
        formatted as strings or left full-precision floats
        :returns: A stats python dict which looks something like:
            SERIES_STATS = {
                'min': 1.1,
                'max': 1.1,
                'avg': 1.1,
                'std_dev': 1.1,
                'median': 1.1,
            }
            {
                'agg_stats': {
                    'worker_count': 1,
                    'start': 1.1,
                    'stop': 1.1,
                    'req_count': 1,
                    'retries': 0,
                    'errors' : 0,
                    'avg_req_per_sec': 1.1, # req_count / (stop - start)?
                    'retry_rate': 0.0,
                    'first_byte_latency': SERIES_STATS,
                    'last_byte_latency': SERIES_STATS,
                },
                'worker_stats': {
                    1: {  # keys are worker_ids
                        'start': 1.1,
                        'stop': 1.1,
                        'req_count': 1,
                        'retries': 0,
                        'retry_rate': 0.0,
                        'errors': 0,
                        'avg_req_per_sec': 1.1, # req_count / (stop - start)?
                        'first_byte_latency': SERIES_STATS,
                        'last_byte_latency': SERIES_STATS,
                    },
                    # ...
                },
                'op_stats': {
                    CREATE_OBJECT: { # keys are CRUD constants: CREATE_OBJECT, READ_OBJECT, etc.
                        'req_count': 1, # num requests of this CRUD type
                        'avg_req_per_sec': 1.1, # total_requests / sum(last_byte_latencies)
                        'first_byte_latency': SERIES_STATS,
                        'last_byte_latency': SERIES_STATS,
                        'size_stats': {
                            'small': { # keys are size_str values
                                'req_count': 1, # num requests of this type and size
                                'retries': 0, # num of retries
                                'avg_req_per_sec': 1.1, # total_requests / sum(last_byte_latencies)
                                'errors': 0,
                                'retry_rate': 0.0,
                                'first_byte_latency': SERIES_STATS,
                                'last_byte_latency': SERIES_STATS,
                            },
                            # ...
                        },
                    },
                    # ...
                },
                'size_stats': {
                    'small': { # keys are size_str values
                        'req_count': 1, # num requests of this size (for all CRUD types)
                        'retries': 0, # num of retries
                        'acutual_request_count': 1, # num requests includes retries
                        'avg_req_per_sec': 1.1, # total_requests / sum(last_byte_latencies)
                        'errors': 0,
                        'retry_rate': 0.0,
                        'first_byte_latency': SERIES_STATS,
                        'last_byte_latency': SERIES_STATS,
                    },
                    # ...
                },
                'time_series': {
                    'start': 1, # epoch time of first data point
                    'data': [
                        1, # number of requests finishing during this second
                        # ...
                    ],
                },
            }
        """
        # Each result looks like:
        # {
        #   'worker_id': 1,
        #   'type': 'get_object',
        #   'size': 4900000,
        #   'size_str': 'large',
        #   'first_byte_latency': 0.9137639999389648,
        #   'last_byte_latency': 0.913769006729126,
        #   'retries': 1
        #   'completed_at': 1324372892.360802,
        # }
        # OR
        # {
        #   'worker_id': 1,
        #   'type': 'get_object',
        #   'size_str': 'large'
        #   'completed_at': 1324372892.360802,
        #   'retries': 1
        #   'exception': '...',
        # }
        logging.info('Calculating statistics...')
        agg_stats = dict(start=2 ** 32, stop=0, req_count=0)
        op_stats = {}
        for crud_type in [ssbench.CREATE_OBJECT, ssbench.READ_OBJECT,
                          ssbench.UPDATE_OBJECT, ssbench.DELETE_OBJECT]:
            op_stats[crud_type] = dict(
                req_count=0, avg_req_per_sec=0,
                size_stats=OrderedDict.fromkeys(
                    self.scenario.sizes_by_name.keys()))

        req_completion_seconds = {}
        start_time = 0
        completion_time_max = 0
        completion_time_min = 2 ** 32
        stats = dict(
            nth_pctile=nth_pctile,
            agg_stats=agg_stats,
            worker_stats={},
            op_stats=op_stats,
            size_stats=OrderedDict.fromkeys(
                self.scenario.sizes_by_name.keys()))
        for results in self.unpacker:
            skipped = 0
            for result in results:
                try:
                    res_completed_at = result['completed_at']
                    res_completion_time = int(res_completed_at)
                    res_worker_id = result['worker_id']
                    res_type = result['type']
                    res_size_str = result['size_str']
                except KeyError as err:
                    logging.info('Skipped result with missing keys (%r): %r',
                                 err, result)
                    skipped += 1
                    continue

                try:
                    res_exception = result['exception']
                except KeyError:
                    try:
                        res_last_byte_latency = result['last_byte_latency']
                    except KeyError:
                        logging.info('Skipped result with missing'
                                     ' last_byte_latency key: %r',
                                     result)
                        skipped += 1
                        continue
                    if res_completion_time < completion_time_min:
                        completion_time_min = res_completion_time
                        start_time = (
                            res_completion_time - res_last_byte_latency)
                    if res_completion_time > completion_time_max:
                        completion_time_max = res_completion_time
                    req_completion_seconds[res_completion_time] = \
                        1 + req_completion_seconds.get(res_completion_time, 0)
                    result['start'] = res_completed_at - res_last_byte_latency
                else:
                    # report log exceptions
                    logging.warn('calculate_scenario_stats: exception from '
                                 'worker %d: %s',
                                 res_worker_id, res_exception)
                    try:
                        res_traceback = result['traceback']
                    except KeyError:
                        logging.warn('traceback missing')
                    else:
                        logging.info(res_traceback)

                # Stats per-worker
                if res_worker_id not in stats['worker_stats']:
                    stats['worker_stats'][res_worker_id] = {}
                self._add_result_to(stats['worker_stats'][res_worker_id],
                                    result)

                # Stats per-file-size
                try:
                    val = stats['size_stats'][res_size_str]
                except KeyError:
                    stats['size_stats'][res_size_str] = {}
                else:
                    if not val:
                        stats['size_stats'][res_size_str] = {}
                self._add_result_to(stats['size_stats'][res_size_str],
                                    result)

                self._add_result_to(agg_stats, result)

                type_stats = op_stats[res_type]
                self._add_result_to(type_stats, result)

                # Stats per-operation-per-file-size
                try:
                    val = type_stats['size_stats'][res_size_str]
                except KeyError:
                    type_stats['size_stats'][res_size_str] = {}
                else:
                    if not val:
                        type_stats['size_stats'][res_size_str] = {}
                self._add_result_to(
                    type_stats['size_stats'][res_size_str], result)
            if skipped > 0:
                logging.warn("Total number of results skipped: %d", skipped)

        agg_stats['worker_count'] = len(stats['worker_stats'].keys())
        self._compute_req_per_sec(agg_stats)
        self._compute_retry_rate(agg_stats)
        self._compute_latency_stats(agg_stats, nth_pctile, format_numbers)

        jobs_per_worker = []
        for worker_stats in stats['worker_stats'].values():
            jobs_per_worker.append(worker_stats['req_count'])
            self._compute_req_per_sec(worker_stats)
            self._compute_retry_rate(worker_stats)
            self._compute_latency_stats(worker_stats, nth_pctile,
                                        format_numbers)
        stats['jobs_per_worker_stats'] = self._series_stats(jobs_per_worker,
                                                            nth_pctile,
                                                            format_numbers)
        logging.debug('Jobs per worker stats:\n' +
                      pformat(stats['jobs_per_worker_stats']))

        for op_stats_dict in op_stats.itervalues():
            if op_stats_dict['req_count']:
                self._compute_req_per_sec(op_stats_dict)
                self._compute_retry_rate(op_stats_dict)
                self._compute_latency_stats(op_stats_dict, nth_pctile,
                                            format_numbers)
                for size_str, size_stats in \
                        op_stats_dict['size_stats'].iteritems():
                    if size_stats:
                        self._compute_req_per_sec(size_stats)
                        self._compute_retry_rate(size_stats)
                        self._compute_latency_stats(size_stats, nth_pctile,
                                                    format_numbers)
                    else:
                        op_stats_dict['size_stats'].pop(size_str)
        for size_str, size_stats in stats['size_stats'].iteritems():
            if size_stats:
                self._compute_req_per_sec(size_stats)
                self._compute_retry_rate(size_stats)
                self._compute_latency_stats(size_stats, nth_pctile,
                                            format_numbers)
            else:
                stats['size_stats'].pop(size_str)
        time_series_data = [req_completion_seconds.get(t, 0)
                            for t in range(completion_time_min,
                                           completion_time_max + 1)]
        stats['time_series'] = dict(start=completion_time_min,
                                    start_time=start_time,
                                    stop=completion_time_max,
                                    data=time_series_data)

        return stats
Пример #8
0
    def __init__(self, scenario_filename, container_count=None,
                 user_count=None, operation_count=None):
        """Initializes the object from a scenario file on disk.

        :scenario_filename: path to a scenario file
        """

        try:
            fp = open(scenario_filename)
            self._scenario_data = json.load(fp)
        except:
            logging.exception('Error loading scenario file %r',
                              scenario_filename)
            raise

        # Sanity-check user_count
        if user_count is not None:
            self.user_count = user_count
        else:
            self.user_count = self._scenario_data['user_count']
        if self.user_count < 1:
            raise ValueError('user_count must be > 1')

        if operation_count is not None:
            self.operation_count = operation_count
        else:
            self.operation_count = self._scenario_data['operation_count']

        self.name = self._scenario_data['name']
        self.container_base = self._scenario_data.get('container_base',
                                                      'ssbench')
        if container_count is not None:
            self.container_count = container_count
        else:
            self.container_count = self._scenario_data.get(
                'container_count', 100)
        self.containers = ['%s_%06d' % (self.container_base, i)
                           for i in xrange(self.container_count)]
        self.container_concurrency = self._scenario_data.get(
            'container_concurrency', 10)

        # Set up sizes
        self.sizes_by_name = OrderedDict()
        for size_data in self._scenario_data['sizes']:
            size_data_copy = copy.deepcopy(size_data)
            self.sizes_by_name[size_data_copy['name']] = size_data_copy
            crud_profile = size_data_copy.get(
                'crud_profile', self._scenario_data['crud_profile'])
            crud_total = sum(crud_profile)
            size_data_copy['crud_pcts'] = [
                float(c) / crud_total * 100 for c in crud_profile]
            # Calculate probability thresholds for each CRUD element for this
            # object size category (defaulting to global crud profile).
            size_data_copy['crud_thresholds'] = [1, 1, 1, 1]
            self._thresholds_for(size_data_copy['crud_thresholds'],
                                 range(4), crud_profile)

        # Calculate probability thresholds for each size (from the
        # initial_files)
        self.bench_size_thresholds = OrderedDict()
        self._thresholds_for(
            self.bench_size_thresholds,
            filter(lambda n: n in self._scenario_data['initial_files'],
                   self.sizes_by_name.keys()),
            self._scenario_data['initial_files'])
Пример #9
0
class Scenario(object):
    """Encapsulation of a benchmark "CRUD" scenario."""

    def __init__(self, scenario_filename, container_count=None,
                 user_count=None, operation_count=None):
        """Initializes the object from a scenario file on disk.

        :scenario_filename: path to a scenario file
        """

        try:
            fp = open(scenario_filename)
            self._scenario_data = json.load(fp)
        except:
            logging.exception('Error loading scenario file %r',
                              scenario_filename)
            raise

        # Sanity-check user_count
        if user_count is not None:
            self.user_count = user_count
        else:
            self.user_count = self._scenario_data['user_count']
        if self.user_count < 1:
            raise ValueError('user_count must be > 1')

        if operation_count is not None:
            self.operation_count = operation_count
        else:
            self.operation_count = self._scenario_data['operation_count']

        self.name = self._scenario_data['name']
        self.container_base = self._scenario_data.get('container_base',
                                                      'ssbench')
        if container_count is not None:
            self.container_count = container_count
        else:
            self.container_count = self._scenario_data.get(
                'container_count', 100)
        self.containers = ['%s_%06d' % (self.container_base, i)
                           for i in xrange(self.container_count)]
        self.container_concurrency = self._scenario_data.get(
            'container_concurrency', 10)

        # Set up sizes
        self.sizes_by_name = OrderedDict()
        for size_data in self._scenario_data['sizes']:
            size_data_copy = copy.deepcopy(size_data)
            self.sizes_by_name[size_data_copy['name']] = size_data_copy
            crud_profile = size_data_copy.get(
                'crud_profile', self._scenario_data['crud_profile'])
            crud_total = sum(crud_profile)
            size_data_copy['crud_pcts'] = [
                float(c) / crud_total * 100 for c in crud_profile]
            # Calculate probability thresholds for each CRUD element for this
            # object size category (defaulting to global crud profile).
            size_data_copy['crud_thresholds'] = [1, 1, 1, 1]
            self._thresholds_for(size_data_copy['crud_thresholds'],
                                 range(4), crud_profile)

        # Calculate probability thresholds for each size (from the
        # initial_files)
        self.bench_size_thresholds = OrderedDict()
        self._thresholds_for(
            self.bench_size_thresholds,
            filter(lambda n: n in self._scenario_data['initial_files'],
                   self.sizes_by_name.keys()),
            self._scenario_data['initial_files'])

    @property
    def crud_pcts(self):
        total = sum(self._scenario_data['crud_profile'])
        return [float(c) / total * 100
                for c in self._scenario_data['crud_profile']]

    def _thresholds_for(self, target, indices, data):
        initial_sum = sum(map(lambda i: data[i], indices))
        last = 0
        for idx in indices:
            last = last + float(data[idx]) / initial_sum
            target[idx] = last

    def job(self, size_str, **kwargs):
        job = {'size_str': size_str}
        job.update(kwargs)
        return job

    def create_job(self, size_str, i):
        """
        Creates job dict which will create an object.
        """

        return self.job(size_str,
                        type=ssbench.CREATE_OBJECT,
                        container=random.choice(self.containers),
                        name='%s_%06d' % (size_str, i),
                        size=random.randint(
                            self.sizes_by_name[size_str]['size_min'],
                            self.sizes_by_name[size_str]['size_max']))

    def bench_job(self, size_str, crud_index, i):
        """Creates a benchmark work job dict of a given size and crud "index"
        (where 0 is Create, 1 is Read, etc.).

        :size_str: One of the size strings defined in the scenario file
        :crud_index: An index into the CRUD array (0 is Create, etc.)
        :i: The job index
        :returns: A dictionary representing benchmark work job
        """

        if crud_index == 0:
            return self.create_job(size_str, i)
        elif crud_index == 1:
            return self.job(size_str, type=ssbench.READ_OBJECT)
        elif crud_index == 2:
            return self.job(
                size_str, type=ssbench.UPDATE_OBJECT,
                size=random.randint(
                    self.sizes_by_name[size_str]['size_min'],
                    self.sizes_by_name[size_str]['size_max']))
        elif crud_index == 3:
            return self.job(size_str, type=ssbench.DELETE_OBJECT)

    def initial_jobs(self):
        """
        Generator for the worker jobs necessary to initialize the cluster
        contents for the scenario.

        :returns: A generator which yields job objects (dicts)
        """

        count_by_size = copy.copy(self._scenario_data['initial_files'])
        index_per_size = dict.fromkeys(count_by_size.iterkeys(), 1)

        yielded = True
        while yielded:
            yielded = False
            for size_str in filter(
                    lambda n: n in self._scenario_data['initial_files'],
                    self.sizes_by_name.keys()):
                if count_by_size[size_str]:
                    yield self.create_job(size_str, index_per_size[size_str])
                    count_by_size[size_str] -= 1
                    index_per_size[size_str] += 1
                    yielded = True

    def bench_jobs(self):
        """
        Generator for the worker jobs necessary to actually run the scenario.

        :returns: A generator which yields job objects (dicts)
        """

        max_index_size = max(self._scenario_data['initial_files'].itervalues())
        for index in xrange(max_index_size + 1,
                            max_index_size + self.operation_count + 1):
            r = random.random()  # uniform on [0, 1)
            for size_str, prob in self.bench_size_thresholds.iteritems():
                if r < prob:
                    this_size_str = size_str
                    break
            # Determine which C/R/U/D type this job will be
            size_crud = self.sizes_by_name[this_size_str]['crud_thresholds']
            r = random.random()  # uniform on [0, 1)
            for crud_index, prob in enumerate(size_crud):
                if r < prob:
                    this_crud_index = crud_index
                    break
            yield self.bench_job(this_size_str, this_crud_index, index)