Exemplo n.º 1
0
    def __init__(self, module_name, class_name, method_name, options,
                                                                cmd_args):
        """
        mirrors the initialization of :class:`funkload.BenchRunner.BenchRunner`
        """
        # store the args. these can be passed to BenchRunner later.
        self.module_name = module_name
        self.class_name = class_name
        self.method_name = method_name
        self.options = options
        self.cmd_args = cmd_args

        self.cmd_args += " --is-distributed"

        if options.feedback:
            self.cmd_args += " --feedback"

        module = load_module(module_name)
        module_file = module.__file__
        self.tarred_tests, self.tarred_testsdir = package_tests(module_file)

        self.remote_res_dir = "/tmp/funkload-bench-sandbox/"

        test = load_unittest(self.module_name, class_name,
                             mmn_encode(method_name, 0, 0, 0), options)

        self.config_path = test._config_path
        self.result_path = test.result_path
        self.class_title = test.conf_get('main', 'title')
        self.class_description = test.conf_get('main', 'description')
        self.test_id = self.method_name
        self.test_url = test.conf_get('main', 'url')
        self.cycles = map(int, test.conf_getList('bench', 'cycles'))
        self.duration = test.conf_getInt('bench', 'duration')
        self.startup_delay = test.conf_getFloat('bench', 'startup_delay')
        self.cycle_time = test.conf_getFloat('bench', 'cycle_time')
        self.sleep_time = test.conf_getFloat('bench', 'sleep_time')
        self.sleep_time_min = test.conf_getFloat('bench', 'sleep_time_min')
        self.sleep_time_max = test.conf_getFloat('bench', 'sleep_time_max')
        if test.conf_get('distribute', 'channel_timeout', '', quiet=True):
            self.channel_timeout = test.conf_getFloat(
                                        'distribute', 'channel_timeout')
        else:
            self.channel_timeout = None
        self.threads = []  # Contains list of ThreadData objects
        self.last_thread_id = -1
        self.thread_creation_lock = threading.Lock()

        if options.python_bin:
            self.python_bin = options.python_bin
        else:
            self.python_bin = test.conf_get(
                'distribute', 'python_bin', 'python')

        if options.distributed_packages:
            self.distributed_packages = options.distributed_packages
        else:
            self.distributed_packages = test.conf_get(
                                                'distribute', 'packages', '')

        try:
            desc = getattr(test, self.method_name).__doc__.strip()
        except:
            desc = ""
        self.test_description = test.conf_get(self.method_name, 'description',
                                              desc)
        # make a collection output location
        if options.distributed_log_path:
            self.distribution_output = options.distributed_log_path
        elif test.conf_get('distribute', 'log_path', '', quiet=True):
            self.distribution_output = test.conf_get('distribute', 'log_path')
        else:
            raise UserWarning("log_path isn't defined in section [distribute]")

        # check if user has overridden the default funkload distro download
        # location this will be used to download funkload on the worker nodes.
        self.funkload_location = test.conf_get(
            'distribute', 'funkload_location', 'funkload')

        if not os.path.isdir(self.distribution_output):
            os.makedirs(self.distribution_output)

        # check if hosts are in options
        workers = []                  # list of (host, port, descr)
        if options.workerlist:
            for h in  options.workerlist.split(","):
                cred_host = h.split("@")
                if len(cred_host) == 1:
                    uname, pwd, host = None, None, cred_host[0]
                else:
                    cred = cred_host[0]
                    host = cred_host[1]
                    uname_pwd = cred.split(":")
                    if len(uname_pwd) == 1:
                        uname, pwd = uname_pwd[0], None
                    else:
                        uname, pwd = uname_pwd

                workers.append({
                    "name": host,
                    "host": host,
                    "password": pwd,
                    "username": uname,
                    "channel_timeout": self.channel_timeout})
        else:
            hosts = test.conf_get('workers', 'hosts', '', quiet=True).split()
            for host in hosts:
                host = host.strip()
                workers.append({
                    "name": host,
                    "host": test.conf_get(host, "host", host),
                    "password": test.conf_get(host, 'password', ''),
                    "username": test.conf_get(host, 'username', ''),
                    "key_filename": test.conf_get(host, 'ssh_key', ''),
                    "channel_timeout": self.channel_timeout})

        self._workers = []
        [self._workers.append(SSHDistributor(**w)) for w in workers]
        self._worker_results = {}
        trace(str(self))

        # setup monitoring
        monitor_hosts = []                  # list of (host, port, descr)
        if not options.is_distributed:
            hosts = test.conf_get('monitor', 'hosts', '', quiet=True).split()
            for host in sorted(hosts):
                name = host
                host = test.conf_get(host,'host',host.strip())
                monitor_hosts.append((name, host, test.conf_getInt(name, 'port'),
                                      test.conf_get(name, 'description', '')))
        self.monitor_hosts = monitor_hosts
        # keep the test to use the result logger for monitoring
        # and call setUp/tearDown Cycle
        self.test = test

        # start the feedback receiver
        if LIVE_FEEDBACK and options.feedback:
            trace("* Starting the Feedback Publisher\n")
            self.feedback = FeedbackPublisher(
                    endpoint=options.feedback_endpoint or DEFAULT_ENDPOINT,
                    pubsub_endpoint=options.feedback_pubsub_endpoint or
                    DEFAULT_PUBSUB
                    )
            self.feedback.start()
        else:
            self.feedback = None
Exemplo n.º 2
0
    def __init__(self, module_name, class_name, method_name, options,
                 cmd_args):
        """
        mirrors the initialization of :class:`funkload.BenchRunner.BenchRunner`
        """
        # store the args. these can be passed to BenchRunner later.
        self.module_name = module_name
        self.class_name = class_name
        self.method_name = method_name
        self.options = options
        self.cmd_args = cmd_args

        wanted = lambda x: ('--distribute' not in x) and ('discover' != x)
        self.cmd_args = filter(wanted, self.cmd_args)
        self.cmd_args.append("--is-distributed")
        # ? Won't this double the --feedback option?
        if options.feedback:
            self.cmd_args.append("--feedback")

        module = load_module(module_name)
        module_file = module.__file__
        self.tarred_tests, self.tarred_testsdir = package_tests(module_file)

        self.remote_res_dir = "/tmp/funkload-bench-sandbox/"

        test = load_unittest(self.module_name, class_name,
                             mmn_encode(method_name, 0, 0, 0), options)

        self.config_path = test._config_path
        self.result_path = test.result_path
        self.class_title = test.conf_get('main', 'title')
        self.class_description = test.conf_get('main', 'description')
        self.test_id = self.method_name
        self.test_url = test.conf_get('main', 'url')
        self.cycles = map(int, test.conf_getList('bench', 'cycles'))
        self.duration = test.conf_getInt('bench', 'duration')
        self.startup_delay = test.conf_getFloat('bench', 'startup_delay')
        self.cycle_time = test.conf_getFloat('bench', 'cycle_time')
        self.sleep_time = test.conf_getFloat('bench', 'sleep_time')
        self.sleep_time_min = test.conf_getFloat('bench', 'sleep_time_min')
        self.sleep_time_max = test.conf_getFloat('bench', 'sleep_time_max')
        if test.conf_get('distribute', 'channel_timeout', '', quiet=True):
            self.channel_timeout = test.conf_getFloat('distribute',
                                                      'channel_timeout')
        else:
            self.channel_timeout = None
        self.threads = []  # Contains list of ThreadData objects
        self.last_thread_id = -1
        self.thread_creation_lock = threading.Lock()

        if options.python_bin:
            self.python_bin = options.python_bin
        else:
            self.python_bin = test.conf_get('distribute', 'python_bin',
                                            'python')

        if options.distributed_packages:
            self.distributed_packages = options.distributed_packages
        else:
            self.distributed_packages = test.conf_get('distribute', 'packages',
                                                      '')

        try:
            desc = getattr(test, self.method_name).__doc__.strip()
        except:
            desc = ""
        self.test_description = test.conf_get(self.method_name, 'description',
                                              desc)
        # make a collection output location
        if options.distributed_log_path:
            self.distribution_output = options.distributed_log_path
        elif test.conf_get('distribute', 'log_path', '', quiet=True):
            self.distribution_output = test.conf_get('distribute', 'log_path')
        else:
            raise UserWarning("log_path isn't defined in section [distribute]")

        # check if user has overridden the default funkload distro download
        # location this will be used to download funkload on the worker nodes.
        self.funkload_location = test.conf_get('distribute',
                                               'funkload_location', 'funkload')

        if not os.path.isdir(self.distribution_output):
            os.makedirs(self.distribution_output)

        # check if hosts are in options
        workers = []  # list of (host, port, descr)
        if options.workerlist:
            for h in options.workerlist.split(","):
                cred_host = h.split("@")
                if len(cred_host) == 1:
                    uname, pwd, host = None, None, cred_host[0]
                else:
                    cred = cred_host[0]
                    host = cred_host[1]
                    uname_pwd = cred.split(":")
                    if len(uname_pwd) == 1:
                        uname, pwd = uname_pwd[0], None
                    else:
                        uname, pwd = uname_pwd

                worker = {
                    "name": host.replace(":", "_"),
                    "host": host,
                    "password": pwd,
                    "username": uname,
                    "channel_timeout": self.channel_timeout
                }

                if options.distributed_key_filename:
                    worker['key_filename'] = options.distributed_key_filename

                workers.append(worker)
        else:
            hosts = test.conf_get('workers', 'hosts', '', quiet=True).split()
            for host in hosts:
                host = host.strip()
                if options.distributed_key_filename:
                    key_filename = options.distributed_key_filename
                else:
                    key_filename = test.conf_get(host, 'ssh_key', '')

                workers.append({
                    "name": host.replace(":", "_"),
                    "host": test.conf_get(host, "host", host),
                    "password": test.conf_get(host, 'password', ''),
                    "username": test.conf_get(host, 'username', ''),
                    "key_filename": key_filename,
                    "channel_timeout": self.channel_timeout
                })

        self._workers = []
        [self._workers.append(SSHDistributor(**w)) for w in workers]
        self._worker_results = {}
        trace(str(self))

        # setup monitoring
        monitor_hosts = []  # list of (host, port, descr)
        if not options.is_distributed:
            hosts = test.conf_get('monitor', 'hosts', '', quiet=True).split()
            for host in sorted(hosts):
                name = host
                host = test.conf_get(host, 'host', host.strip())
                monitor_hosts.append(
                    (name, host, test.conf_getInt(name, 'port'),
                     test.conf_get(name, 'description', '')))
        self.monitor_hosts = monitor_hosts
        # keep the test to use the result logger for monitoring
        # and call setUp/tearDown Cycle
        self.test = test

        # start the feedback receiver
        if LIVE_FEEDBACK and options.feedback:
            trace("* Starting the Feedback Publisher\n")
            self.feedback = FeedbackPublisher(
                endpoint=options.feedback_endpoint or DEFAULT_ENDPOINT,
                pubsub_endpoint=options.feedback_pubsub_endpoint
                or DEFAULT_PUBSUB,
                handler=_print_rt)
            self.feedback.start()
        else:
            self.feedback = None
Exemplo n.º 3
0
class DistributionMgr(threading.Thread):
    """
    Interface for use by :mod:`funkload.TestRunner` to distribute
    the bench over multiple machines.
    """
    def __init__(self, module_name, class_name, method_name, options,
                                                                cmd_args):
        """
        mirrors the initialization of :class:`funkload.BenchRunner.BenchRunner`
        """
        # store the args. these can be passed to BenchRunner later.
        self.module_name = module_name
        self.class_name = class_name
        self.method_name = method_name
        self.options = options
        self.cmd_args = cmd_args

        self.cmd_args += " --is-distributed"

        if options.feedback:
            self.cmd_args += " --feedback"

        module = load_module(module_name)
        module_file = module.__file__
        self.tarred_tests, self.tarred_testsdir = package_tests(module_file)

        self.remote_res_dir = "/tmp/funkload-bench-sandbox/"

        test = load_unittest(self.module_name, class_name,
                             mmn_encode(method_name, 0, 0, 0), options)

        self.config_path = test._config_path
        self.result_path = test.result_path
        self.class_title = test.conf_get('main', 'title')
        self.class_description = test.conf_get('main', 'description')
        self.test_id = self.method_name
        self.test_url = test.conf_get('main', 'url')
        self.cycles = map(int, test.conf_getList('bench', 'cycles'))
        self.duration = test.conf_getInt('bench', 'duration')
        self.startup_delay = test.conf_getFloat('bench', 'startup_delay')
        self.cycle_time = test.conf_getFloat('bench', 'cycle_time')
        self.sleep_time = test.conf_getFloat('bench', 'sleep_time')
        self.sleep_time_min = test.conf_getFloat('bench', 'sleep_time_min')
        self.sleep_time_max = test.conf_getFloat('bench', 'sleep_time_max')
        if test.conf_get('distribute', 'channel_timeout', '', quiet=True):
            self.channel_timeout = test.conf_getFloat(
                                        'distribute', 'channel_timeout')
        else:
            self.channel_timeout = None
        self.threads = []  # Contains list of ThreadData objects
        self.last_thread_id = -1
        self.thread_creation_lock = threading.Lock()

        if options.python_bin:
            self.python_bin = options.python_bin
        else:
            self.python_bin = test.conf_get(
                'distribute', 'python_bin', 'python')

        if options.distributed_packages:
            self.distributed_packages = options.distributed_packages
        else:
            self.distributed_packages = test.conf_get(
                                                'distribute', 'packages', '')

        try:
            desc = getattr(test, self.method_name).__doc__.strip()
        except:
            desc = ""
        self.test_description = test.conf_get(self.method_name, 'description',
                                              desc)
        # make a collection output location
        if options.distributed_log_path:
            self.distribution_output = options.distributed_log_path
        elif test.conf_get('distribute', 'log_path', '', quiet=True):
            self.distribution_output = test.conf_get('distribute', 'log_path')
        else:
            raise UserWarning("log_path isn't defined in section [distribute]")

        # check if user has overridden the default funkload distro download
        # location this will be used to download funkload on the worker nodes.
        self.funkload_location = test.conf_get(
            'distribute', 'funkload_location', 'funkload')

        if not os.path.isdir(self.distribution_output):
            os.makedirs(self.distribution_output)

        # check if hosts are in options
        workers = []                  # list of (host, port, descr)
        if options.workerlist:
            for h in  options.workerlist.split(","):
                cred_host = h.split("@")
                if len(cred_host) == 1:
                    uname, pwd, host = None, None, cred_host[0]
                else:
                    cred = cred_host[0]
                    host = cred_host[1]
                    uname_pwd = cred.split(":")
                    if len(uname_pwd) == 1:
                        uname, pwd = uname_pwd[0], None
                    else:
                        uname, pwd = uname_pwd

                workers.append({
                    "name": host,
                    "host": host,
                    "password": pwd,
                    "username": uname,
                    "channel_timeout": self.channel_timeout})
        else:
            hosts = test.conf_get('workers', 'hosts', '', quiet=True).split()
            for host in hosts:
                host = host.strip()
                workers.append({
                    "name": host,
                    "host": test.conf_get(host, "host", host),
                    "password": test.conf_get(host, 'password', ''),
                    "username": test.conf_get(host, 'username', ''),
                    "key_filename": test.conf_get(host, 'ssh_key', ''),
                    "channel_timeout": self.channel_timeout})

        self._workers = []
        [self._workers.append(SSHDistributor(**w)) for w in workers]
        self._worker_results = {}
        trace(str(self))

        # setup monitoring
        monitor_hosts = []                  # list of (host, port, descr)
        if not options.is_distributed:
            hosts = test.conf_get('monitor', 'hosts', '', quiet=True).split()
            for host in sorted(hosts):
                name = host
                host = test.conf_get(host,'host',host.strip())
                monitor_hosts.append((name, host, test.conf_getInt(name, 'port'),
                                      test.conf_get(name, 'description', '')))
        self.monitor_hosts = monitor_hosts
        # keep the test to use the result logger for monitoring
        # and call setUp/tearDown Cycle
        self.test = test

        # start the feedback receiver
        if LIVE_FEEDBACK and options.feedback:
            trace("* Starting the Feedback Publisher\n")
            self.feedback = FeedbackPublisher(
                    endpoint=options.feedback_endpoint or DEFAULT_ENDPOINT,
                    pubsub_endpoint=options.feedback_pubsub_endpoint or
                    DEFAULT_PUBSUB
                    )
            self.feedback.start()
        else:
            self.feedback = None

    def __repr__(self):
        """Display distributed bench information."""
        text = []
        text.append('=' * 72)
        text.append('Benching %s.%s' % (self.class_name,
                                        self.method_name))
        text.append('=' * 72)
        text.append(self.test_description)
        text.append('-' * 72 + '\n')
        text.append("Configuration")
        text.append("=============\n")
        text.append("* Current time: %s" % datetime.now().isoformat())
        text.append("* Configuration file: %s" % self.config_path)
        text.append("* Distributed output: %s" % self.distribution_output)
        size = os.path.getsize(self.tarred_tests)
        text.append("* Tarred tests: %0.2fMB"%(float(size)/10.0**6))
        text.append("* Server: %s" % self.test_url)
        text.append("* Cycles: %s" % self.cycles)
        text.append("* Cycle duration: %ss" % self.duration)
        text.append("* Sleeptime between request: from %ss to %ss" % (
            self.sleep_time_min, self.sleep_time_max))
        text.append("* Sleeptime between test case: %ss" % self.sleep_time)
        text.append("* Startup delay between thread: %ss" %
                    self.startup_delay)
        text.append("* Channel timeout: %s%s" % (
            self.channel_timeout, "s" if self.channel_timeout else ""))
        text.append("* Workers :%s\n\n" % ",".join(
                                                w.name for w in self._workers))
        return '\n'.join(text)

    def prepare_workers(self, allow_errors=False):
        """
        Initialize the sandboxes in each worker node to prepare for a
        bench run. The additional parameter `allow_errors` will essentially
        make the distinction between ignoring unresponsive/inappropriate
        nodes - or raising an error and failing the entire bench.
        """
        # right, lets figure out if funkload can be setup on each host

        def local_prep_worker(worker):

            remote_res_dir = os.path.join(self.remote_res_dir, worker.name)
            virtual_env = os.path.join(
                remote_res_dir, self.tarred_testsdir)

            if worker.isdir(virtual_env):
                worker.execute("rm -rf %s" % virtual_env)

            worker.execute("mkdir -p %s" % virtual_env)
            worker.put(
                get_virtualenv_script(),
                os.path.join(remote_res_dir, "virtualenv.py"))

            trace(".")
            worker.execute(
                "%s virtualenv.py %s" % (
                    self.python_bin, self.tarred_testsdir),
                cwdir=remote_res_dir)

            tarball = os.path.split(self.tarred_tests)[1]
            remote_tarball = os.path.join(remote_res_dir, tarball)

            # setup funkload
            cmd = "./bin/easy_install setuptools ez_setup {funkload}".format(
                funkload=self.funkload_location)

            if self.distributed_packages:
                cmd += " %s" % self.distributed_packages

            worker.execute(cmd, cwdir=virtual_env)

            #unpackage tests.
            worker.put(
                self.tarred_tests, os.path.join(remote_res_dir, tarball))
            worker.execute(
                "tar -xvf %s" % tarball,
                cwdir=remote_res_dir)
            worker.execute("rm %s" % remote_tarball)

            # workaround for https://github.com/pypa/virtualenv/issues/330
            worker.execute("rm lib64", cwdir=virtual_env)
            worker.execute("ln -s lib lib64", cwdir=virtual_env)

        threads = []
        trace("* Preparing sandboxes for %d workers." % len(self._workers))
        for worker in list(self._workers):
            if not worker.connected:
                if allow_errors:
                    trace("%s is not connected, removing from pool.\n" % \
                                                                 worker.name)
                    self._workers.remove(worker)
                    continue
                else:
                    raise RuntimeError(
                        "%s is not contactable with error %s" % (
                            worker.name, worker.error))

            # Verify that the Python binary is available
            which_python = "test -x `which %s 2>&1 > /dev/null` && echo true" \
                    % (self.python_bin)
            out, err = worker.execute(which_python)

            if out.strip() == "true":
                threads.append(threading.Thread(
                    target=local_prep_worker,
                    args=(worker,)))
            elif allow_errors:
                trace("Cannot find Python binary at path `%s` on %s, " + \
                      "removing from pool" % (self.python_bin, worker.name))
                self._workers.remove(worker)
            else:
                raise RuntimeError("%s is not contactable with error %s" % (
                    worker.name, worker.error))

        [k.start() for k in threads]
        [k.join() for k in threads]
        trace("\n")
        if not self._workers:
            raise RuntimeError("no workers available for distribution")

    def abort(self):
        for worker in self._workers:
            worker.die()

    def run(self):
        """
        """
        threads = []
        trace("* Starting %d workers" % len(self._workers))

        self.startMonitors()
        for worker in self._workers:
            remote_res_dir = os.path.join(self.remote_res_dir, worker.name)
            venv = os.path.join(remote_res_dir, self.tarred_testsdir)
            obj = worker.threaded_execute(
                'bin/fl-run-bench %s' % self.cmd_args,
                cwdir=venv)
            trace(".")
            threads.append(obj)
        trace("\n")
        [t.join() for t in threads]
        trace("\n")

        for thread, worker in zip(threads, self._workers):
            self._worker_results[worker] = thread.output.read()
            trace("* [%s] returned\n" % worker.name)
            err_string = thread.err.read()
            if err_string:
                trace("\n".join("  [%s]: %s" % (worker.name, k) for k \
                        in err_string.split("\n") if k.strip()))
            trace("\n")

        self.stopMonitors()
        self.correlate_statistics()

    def final_collect(self):
        expr = re.compile("Log\s+xml:\s+(.*?)\n")
        for worker, results in self._worker_results.items():
            res = expr.findall(results)
            if res:
                remote_file = res[0]
                filename = os.path.split(remote_file)[1]
                local_file = os.path.join(
                    self.distribution_output, "%s-%s" % (
                        worker.name, filename))
                worker.get(remote_file, local_file)
                trace("* Received bench log from [%s] into %s\n" % (
                    worker.name, local_file))

    def startMonitors(self):
        """Start monitoring on hosts list."""
        if not self.monitor_hosts:
            return
        monitor_hosts = []
        monitor_key = "%s:0:0" % self.method_name
        for (name, host, port, desc) in self.monitor_hosts:
            trace("* Start monitoring %s: ..." % name)
            server = ServerProxy("http://%s:%s" % (host, port))
            try:
                server.startRecord(monitor_key)
            except SocketError:
                trace(' failed, server is down.\n')
            else:
                trace(' done.\n')
                monitor_hosts.append((name, host, port, desc))
        self.monitor_hosts = monitor_hosts

    def stopMonitors(self):
        """Stop monitoring and save xml result."""
        if not self.monitor_hosts:
            return
        monitor_key = "%s:0:0" % self.method_name
        successful_results = []
        for (name, host, port, desc) in self.monitor_hosts:
            trace('* Stop monitoring %s: ' % host)
            server = ServerProxy("http://%s:%s" % (host, port))
            try:
                server.stopRecord(monitor_key)
                successful_results.append(server.getXmlResult(monitor_key))
            except SocketError:
                trace(' failed, server is down.\n')
            else:
                trace(' done.\n')

        self.write_statistics(successful_results)
        if self.feedback is not None:
            self.feedback.close()

    def write_statistics(self, successful_results):
        """ Write the distributed stats to a file in the output dir """
        path = os.path.join(self.distribution_output, "stats.xml")
        config = {'id': self.test_id,
                  'description': self.test_description,
                  'class_title': self.class_title,
                  'class_description': self.class_description,
                  'module': self.module_name,
                  'class': self.class_name,
                  'method': self.method_name,
                  'cycles': self.cycles,
                  'duration': self.duration,
                  'sleep_time': self.sleep_time,
                  'startup_delay': self.startup_delay,
                  'sleep_time_min': self.sleep_time_min,
                  'sleep_time_max': self.sleep_time_max,
                  'cycle_time': self.cycle_time,
                  'configuration_file': self.config_path,
                  'server_url': self.test_url,
                  'log_xml': self.result_path,
                  'python_version': platform.python_version()}

        for (name, host, port, desc) in self.monitor_hosts:
            config[name] = desc

        with open(path, "w+") as fd:
            fd.write('<funkload version="{version}" time="{time}">\n'.format(
                            version=get_version(), time=time.time()))
            for key, value in config.items():
                # Write out the config values
                fd.write('<config key="{key}" value="{value}"/>\n'.format(
                                                        key=key, value=value))
            for xml in successful_results:
                fd.write(xml)
                fd.write("\n")

            fd.write("</funkload>\n")

    def _calculate_time_skew(self, results, stats):
        if not results or not stats:
            return 1

        def min_time(vals):
            keyfunc = lambda elem: float(elem.attrib['time'])
            return keyfunc(min(vals, key=keyfunc))

        results_min = min_time(results)
        monitor_min = min_time(stats)

        return results_min / monitor_min

    def _calculate_results_ranges(self, results):
        seen = []
        times = {}
        for element in results:
            cycle = int(element.attrib['cycle'])
            if cycle not in seen:
                seen.append(cycle)

                cvus = int(element.attrib['cvus'])
                start_time = float(element.attrib['time'])
                times[start_time] = (cycle, cvus)

        return times

    def correlate_statistics(self):
        result_path = None
        if not self.monitor_hosts:
            return
        for worker, results in self._worker_results.items():
            files = glob("%s/%s-*.xml" % (self.distribution_output,
                                          worker.name))
            if files:
                result_path = files[0]
                break

        if not result_path:
            trace("* No output files found; unable to correlate stats.\n")
            return

        # Calculate the ratio between results and monitoring
        results_tree = ElementTree(file=result_path)
        stats_path = os.path.join(self.distribution_output, "stats.xml")
        stats_tree = ElementTree(file=stats_path)

        results = results_tree.findall("testResult")
        stats = stats_tree.findall("monitor")
        ratio = self._calculate_time_skew(results, stats)

        # Now that we have the ratio, we can calculate the sessions!
        times = self._calculate_results_ranges(results)
        times_desc = sorted(times.keys(), reverse=True)

        # Now, parse the stats tree and update values
        def find_range(start_time):
            for time_ in times_desc:
                if start_time > time_:
                    return times[time_]
            else:
                return times[time_]

        for stat in stats:
            adj_time = float(stat.attrib['time']) * ratio
            cycle, cvus = find_range(adj_time)
            key, cycle_, cvus_ = stat.attrib['key'].partition(':')
            stat.attrib['key'] = "%s:%d:%d" % (key, cycle, cvus)

        stats_tree.write(stats_path)
Exemplo n.º 4
0
class DistributionMgr(threading.Thread):
    """
    Interface for use by :mod:`funkload.TestRunner` to distribute
    the bench over multiple machines.
    """
    def __init__(self, module_name, class_name, method_name, options,
                 cmd_args):
        """
        mirrors the initialization of :class:`funkload.BenchRunner.BenchRunner`
        """
        # store the args. these can be passed to BenchRunner later.
        self.module_name = module_name
        self.class_name = class_name
        self.method_name = method_name
        self.options = options
        self.cmd_args = cmd_args

        wanted = lambda x: ('--distribute' not in x) and ('discover' != x)
        self.cmd_args = filter(wanted, self.cmd_args)
        self.cmd_args.append("--is-distributed")
        # ? Won't this double the --feedback option?
        if options.feedback:
            self.cmd_args.append("--feedback")

        module = load_module(module_name)
        module_file = module.__file__
        self.tarred_tests, self.tarred_testsdir = package_tests(module_file)

        self.remote_res_dir = "/tmp/funkload-bench-sandbox/"

        test = load_unittest(self.module_name, class_name,
                             mmn_encode(method_name, 0, 0, 0), options)

        self.config_path = test._config_path
        self.result_path = test.result_path
        self.class_title = test.conf_get('main', 'title')
        self.class_description = test.conf_get('main', 'description')
        self.test_id = self.method_name
        self.test_url = test.conf_get('main', 'url')
        self.cycles = map(int, test.conf_getList('bench', 'cycles'))
        self.duration = test.conf_getInt('bench', 'duration')
        self.startup_delay = test.conf_getFloat('bench', 'startup_delay')
        self.cycle_time = test.conf_getFloat('bench', 'cycle_time')
        self.sleep_time = test.conf_getFloat('bench', 'sleep_time')
        self.sleep_time_min = test.conf_getFloat('bench', 'sleep_time_min')
        self.sleep_time_max = test.conf_getFloat('bench', 'sleep_time_max')
        if test.conf_get('distribute', 'channel_timeout', '', quiet=True):
            self.channel_timeout = test.conf_getFloat('distribute',
                                                      'channel_timeout')
        else:
            self.channel_timeout = None
        self.threads = []  # Contains list of ThreadData objects
        self.last_thread_id = -1
        self.thread_creation_lock = threading.Lock()

        if options.python_bin:
            self.python_bin = options.python_bin
        else:
            self.python_bin = test.conf_get('distribute', 'python_bin',
                                            'python')

        if options.distributed_packages:
            self.distributed_packages = options.distributed_packages
        else:
            self.distributed_packages = test.conf_get('distribute', 'packages',
                                                      '')

        try:
            desc = getattr(test, self.method_name).__doc__.strip()
        except:
            desc = ""
        self.test_description = test.conf_get(self.method_name, 'description',
                                              desc)
        # make a collection output location
        if options.distributed_log_path:
            self.distribution_output = options.distributed_log_path
        elif test.conf_get('distribute', 'log_path', '', quiet=True):
            self.distribution_output = test.conf_get('distribute', 'log_path')
        else:
            raise UserWarning("log_path isn't defined in section [distribute]")

        # check if user has overridden the default funkload distro download
        # location this will be used to download funkload on the worker nodes.
        self.funkload_location = test.conf_get('distribute',
                                               'funkload_location', 'funkload')

        if not os.path.isdir(self.distribution_output):
            os.makedirs(self.distribution_output)

        # check if hosts are in options
        workers = []  # list of (host, port, descr)
        if options.workerlist:
            for h in options.workerlist.split(","):
                cred_host = h.split("@")
                if len(cred_host) == 1:
                    uname, pwd, host = None, None, cred_host[0]
                else:
                    cred = cred_host[0]
                    host = cred_host[1]
                    uname_pwd = cred.split(":")
                    if len(uname_pwd) == 1:
                        uname, pwd = uname_pwd[0], None
                    else:
                        uname, pwd = uname_pwd

                worker = {
                    "name": host.replace(":", "_"),
                    "host": host,
                    "password": pwd,
                    "username": uname,
                    "channel_timeout": self.channel_timeout
                }

                if options.distributed_key_filename:
                    worker['key_filename'] = options.distributed_key_filename

                workers.append(worker)
        else:
            hosts = test.conf_get('workers', 'hosts', '', quiet=True).split()
            for host in hosts:
                host = host.strip()
                if options.distributed_key_filename:
                    key_filename = options.distributed_key_filename
                else:
                    key_filename = test.conf_get(host, 'ssh_key', '')

                workers.append({
                    "name": host.replace(":", "_"),
                    "host": test.conf_get(host, "host", host),
                    "password": test.conf_get(host, 'password', ''),
                    "username": test.conf_get(host, 'username', ''),
                    "key_filename": key_filename,
                    "channel_timeout": self.channel_timeout
                })

        self._workers = []
        [self._workers.append(SSHDistributor(**w)) for w in workers]
        self._worker_results = {}
        trace(str(self))

        # setup monitoring
        monitor_hosts = []  # list of (host, port, descr)
        if not options.is_distributed:
            hosts = test.conf_get('monitor', 'hosts', '', quiet=True).split()
            for host in sorted(hosts):
                name = host
                host = test.conf_get(host, 'host', host.strip())
                monitor_hosts.append(
                    (name, host, test.conf_getInt(name, 'port'),
                     test.conf_get(name, 'description', '')))
        self.monitor_hosts = monitor_hosts
        # keep the test to use the result logger for monitoring
        # and call setUp/tearDown Cycle
        self.test = test

        # start the feedback receiver
        if LIVE_FEEDBACK and options.feedback:
            trace("* Starting the Feedback Publisher\n")
            self.feedback = FeedbackPublisher(
                endpoint=options.feedback_endpoint or DEFAULT_ENDPOINT,
                pubsub_endpoint=options.feedback_pubsub_endpoint
                or DEFAULT_PUBSUB,
                handler=_print_rt)
            self.feedback.start()
        else:
            self.feedback = None

    def __repr__(self):
        """Display distributed bench information."""
        text = []
        text.append('=' * 72)
        text.append('Benching %s.%s' % (self.class_name, self.method_name))
        text.append('=' * 72)
        text.append(self.test_description)
        text.append('-' * 72 + '\n')
        text.append("Configuration")
        text.append("=============\n")
        text.append("* Current time: %s" % datetime.now().isoformat())
        text.append("* Configuration file: %s" % self.config_path)
        text.append("* Distributed output: %s" % self.distribution_output)
        size = os.path.getsize(self.tarred_tests)
        text.append("* Tarred tests: %0.2fMB" % (float(size) / 10.0**6))
        text.append("* Server: %s" % self.test_url)
        text.append("* Cycles: %s" % self.cycles)
        text.append("* Cycle duration: %ss" % self.duration)
        text.append("* Sleeptime between request: from %ss to %ss" %
                    (self.sleep_time_min, self.sleep_time_max))
        text.append("* Sleeptime between test case: %ss" % self.sleep_time)
        text.append("* Startup delay between thread: %ss" % self.startup_delay)
        text.append(
            "* Channel timeout: %s%s" %
            (self.channel_timeout, "s" if self.channel_timeout else ""))
        text.append("* Workers :%s\n\n" % ",".join(w.name
                                                   for w in self._workers))
        return '\n'.join(text)

    def prepare_workers(self, allow_errors=False):
        """
        Initialize the sandboxes in each worker node to prepare for a
        bench run. The additional parameter `allow_errors` will essentially
        make the distinction between ignoring unresponsive/inappropriate
        nodes - or raising an error and failing the entire bench.
        """

        # right, lets figure out if funkload can be setup on each host

        def local_prep_worker(worker):

            remote_res_dir = os.path.join(self.remote_res_dir, worker.name)
            virtual_env = os.path.join(remote_res_dir, self.tarred_testsdir)

            if worker.isdir(virtual_env):
                worker.execute("rm -rf %s" % virtual_env)

            worker.execute("mkdir -p %s" % virtual_env)
            worker.put(
                get_virtualenv_script(),
                ## os.path.join(remote_res_dir, "virtualenv.py"))
                os.path.join(remote_res_dir, "tmpvenv.py"))

            trace(".")
            worker.execute(
                # "%s virtualenv.py %s" % (
                "%s tmpvenv.py %s" %
                (self.python_bin,
                 os.path.join(remote_res_dir, self.tarred_testsdir)),
                cwdir=remote_res_dir)

            tarball = os.path.split(self.tarred_tests)[1]
            remote_tarball = os.path.join(remote_res_dir, tarball)

            # setup funkload
            cmd = "./bin/easy_install setuptools ez_setup {funkload}".format(
                funkload=self.funkload_location)

            if self.distributed_packages:
                cmd += " %s" % self.distributed_packages

            worker.execute(cmd, cwdir=virtual_env)

            # unpackage tests.
            worker.put(self.tarred_tests, os.path.join(remote_res_dir,
                                                       tarball))
            worker.execute("tar -xvf %s" % tarball, cwdir=remote_res_dir)
            worker.execute("rm %s" % remote_tarball)

            # workaround for https://github.com/pypa/virtualenv/issues/330
            worker.execute("rm lib64", cwdir=virtual_env)
            worker.execute("ln -s lib lib64", cwdir=virtual_env)

        threads = []
        trace("* Preparing sandboxes for %d workers." % len(self._workers))
        for worker in list(self._workers):
            if not worker.connected:
                if allow_errors:
                    trace("%s is not connected, removing from pool.\n" %
                          worker.name)
                    self._workers.remove(worker)
                    continue
                else:
                    raise RuntimeError("%s is not contactable with error %s" %
                                       (worker.name, worker.error))

            # Verify that the Python binary is available
            which_python = "test -x `which %s 2>&1 > /dev/null` && echo true" \
                % (self.python_bin)
            out, err = worker.execute(which_python)

            if out.strip() == "true":
                threads.append(
                    threading.Thread(target=local_prep_worker,
                                     args=(worker, )))
            elif allow_errors:
                trace("Cannot find Python binary at path `%s` on %s, " +
                      "removing from pool" % (self.python_bin, worker.name))
                self._workers.remove(worker)
            else:
                raise RuntimeError("%s is not contactable with error %s" %
                                   (worker.name, worker.error))

        [k.start() for k in threads]
        [k.join() for k in threads]
        trace("\n")
        if not self._workers:
            raise RuntimeError("no workers available for distribution")

    def abort(self):
        for worker in self._workers:
            worker.die()

    def run(self):
        """
        """
        threads = []
        trace("* Starting %d workers" % len(self._workers))

        self.startMonitors()
        for worker in self._workers:
            remote_res_dir = os.path.join(self.remote_res_dir, worker.name)
            venv = os.path.join(remote_res_dir, self.tarred_testsdir)
            obj = worker.threaded_execute('bin/fl-run-bench ' +
                                          ' '.join(self.cmd_args),
                                          cwdir=venv)
            trace(".")
            threads.append(obj)

        trace("\n")

        while True:
            if all([not thread.is_alive() for thread in threads]):
                # we're done
                break
            time.sleep(5.)

        trace("\n")

        for thread, worker in zip(threads, self._workers):
            self._worker_results[worker] = thread.output.read()
            trace("* [%s] returned\n" % worker.name)
            err_string = thread.err.read()
            if err_string:
                trace("\n".join("  [%s]: %s" % (worker.name, k)
                                for k in err_string.split("\n") if k.strip()))
            trace("\n")

        self.stopMonitors()
        self.correlate_statistics()

    def final_collect(self):
        expr = re.compile("Log\s+xml:\s+(.*?)\n")
        for worker, results in self._worker_results.items():
            res = expr.findall(results)
            if res:
                remote_file = res[0]
                filename = os.path.split(remote_file)[1]
                local_file = os.path.join(self.distribution_output,
                                          "%s-%s" % (worker.name, filename))
                if os.access(local_file, os.F_OK):
                    os.rename(local_file,
                              local_file + '.bak-' + str(int(time.time())))
                worker.get(remote_file, local_file)
                trace("* Received bench log from [%s] into %s\n" %
                      (worker.name, local_file))

    def startMonitors(self):
        """Start monitoring on hosts list."""
        if not self.monitor_hosts:
            return
        monitor_hosts = []
        monitor_key = "%s:0:0" % self.method_name
        for (name, host, port, desc) in self.monitor_hosts:
            trace("* Start monitoring %s: ..." % name)
            server = ServerProxy("http://%s:%s" % (host, port))
            try:
                server.startRecord(monitor_key)
            except SocketError:
                trace(' failed, server is down.\n')
            else:
                trace(' done.\n')
                monitor_hosts.append((name, host, port, desc))
        self.monitor_hosts = monitor_hosts

    def stopMonitors(self):
        """Stop monitoring and save xml result."""
        if not self.monitor_hosts:
            return
        monitor_key = "%s:0:0" % self.method_name
        successful_results = []
        for (name, host, port, desc) in self.monitor_hosts:
            trace('* Stop monitoring %s: ' % host)
            server = ServerProxy("http://%s:%s" % (host, port))
            try:
                server.stopRecord(monitor_key)
                successful_results.append(server.getXmlResult(monitor_key))
            except SocketError:
                trace(' failed, server is down.\n')
            else:
                trace(' done.\n')

        self.write_statistics(successful_results)
        if self.feedback is not None:
            self.feedback.close()

    def write_statistics(self, successful_results):
        """ Write the distributed stats to a file in the output dir """
        path = os.path.join(self.distribution_output, "stats.xml")
        if os.access(path, os.F_OK):
            os.rename(path, path + '.bak-' + str(int(time.time())))
        config = {
            'id': self.test_id,
            'description': self.test_description,
            'class_title': self.class_title,
            'class_description': self.class_description,
            'module': self.module_name,
            'class': self.class_name,
            'method': self.method_name,
            'cycles': self.cycles,
            'duration': self.duration,
            'sleep_time': self.sleep_time,
            'startup_delay': self.startup_delay,
            'sleep_time_min': self.sleep_time_min,
            'sleep_time_max': self.sleep_time_max,
            'cycle_time': self.cycle_time,
            'configuration_file': self.config_path,
            'server_url': self.test_url,
            'log_xml': self.result_path,
            'python_version': platform.python_version()
        }

        for (name, host, port, desc) in self.monitor_hosts:
            config[name] = desc

        with open(path, "w+") as fd:
            fd.write('<funkload version="{version}" time="{time}">\n'.format(
                version=get_version(), time=time.time()))
            for key, value in config.items():
                # Write out the config values
                fd.write('<config key="{key}" value="{value}"/>\n'.format(
                    key=key, value=value))
            for xml in successful_results:
                fd.write(xml)
                fd.write("\n")

            fd.write("</funkload>\n")

    def _calculate_time_skew(self, results, stats):
        if not results or not stats:
            return 1

        def min_time(vals):
            keyfunc = lambda elem: float(elem.attrib['time'])
            return keyfunc(min(vals, key=keyfunc))

        results_min = min_time(results)
        monitor_min = min_time(stats)

        return results_min / monitor_min

    def _calculate_results_ranges(self, results):
        seen = []
        times = {}
        for element in results:
            cycle = int(element.attrib['cycle'])
            if cycle not in seen:
                seen.append(cycle)

                cvus = int(element.attrib['cvus'])
                start_time = float(element.attrib['time'])
                times[start_time] = (cycle, cvus)

        return times

    def correlate_statistics(self):
        result_path = None
        if not self.monitor_hosts:
            return
        for worker, results in self._worker_results.items():
            files = glob("%s/%s-*.xml" %
                         (self.distribution_output, worker.name))
            if files:
                result_path = files[0]
                break

        if not result_path:
            trace("* No output files found; unable to correlate stats.\n")
            return

        # Calculate the ratio between results and monitoring
        results_tree = ElementTree(file=result_path)
        stats_path = os.path.join(self.distribution_output, "stats.xml")
        stats_tree = ElementTree(file=stats_path)

        results = results_tree.findall("testResult")
        stats = stats_tree.findall("monitor")
        ratio = self._calculate_time_skew(results, stats)

        # Now that we have the ratio, we can calculate the sessions!
        times = self._calculate_results_ranges(results)
        times_desc = sorted(times.keys(), reverse=True)

        # Now, parse the stats tree and update values
        def find_range(start_time):
            for time_ in times_desc:
                if start_time > time_:
                    return times[time_]
            else:
                return times[time_]

        for stat in stats:
            adj_time = float(stat.attrib['time']) * ratio
            cycle, cvus = find_range(adj_time)
            key, cycle_, cvus_ = stat.attrib['key'].partition(':')
            stat.attrib['key'] = "%s:%d:%d" % (key, cycle, cvus)

        stats_tree.write(stats_path)