示例#1
0
    def start_cmd(self,
                  service_class,
                  service_run_func=DEFAULT_SERVICE_RUN_FUNC):

        if isinstance(service_run_func, string_types):
            service_run_func = utils.get_class_from_qualname(service_run_func)

        if not isinstance(service_class, string_types):
            service_qualname = utils.get_class_qualname(service_class)
        else:
            service_qualname = service_class

        proc = multiprocessing.Process(target=service_run_func,
                                       args=(
                                           service_qualname,
                                           self.service_port,
                                       ))
        # NOTE: even daemonized it will still share stdout/err with this proc.
        # That is a good thing imho, since we will see tracebacks. SSH-based
        # services will not enjoy the same advantage by default.`
        proc.daemon = True
        proc.start()

        # TODO: make sure we didn't die quick? But how long should we wait to
        # find out? And what if we die during executing a handler? A
        # ping-response pattern to check aliveness seems crazy overkill for a
        # lib like this, but not hard.
        #
        # If the user set up remote logging, they will probably see the
        # traceback for a death during execution. But they won't see a start up
        # death, since they won't have called in to set up remote logging yet.
        # Up to them to do logging to file? That seems fair for now.

        self._proc = proc
示例#2
0
    def default(self, obj):
        if hasattr(obj, 'to_json'):
            qualname = get_class_qualname(obj.__class__)
            qualname = _DEFAULT_MAGIC + qualname
            return {qualname: obj.to_json()}

        raise TypeError("Cannot figure out how to JSONify: %s" % repr(obj))
示例#3
0
def start_remote_services(service_class):
    """
    Simplest method to start some services remotely.

    :param class service_class: a decendent of :class:`CoordinatorService`
    :raises RuntimeError: on various issues with service start up
    :return: A ServiceGroup wrapping the started services
    """
    service_qualname = utils.get_class_qualname(service_class)
    return ServiceGroup(service_qualname)
示例#4
0
    def start_cmd(self,
                  service_class,
                  service_run_func=bootstrap.DEFAULT_SERVICE_RUN_FUNC):

        if isinstance(service_run_func, string_types):
            service_run_func = utils.get_class_from_qualname(service_run_func)
        srf_module = service_run_func.__module__
        service_run_func = utils.get_class_qualname(service_run_func)

        if not isinstance(service_class, string_types):
            service_qualname = utils.get_class_qualname(service_class)
        else:
            service_qualname = service_class
            service_class = utils.get_class_from_qualname(service_class)
        sc_module = service_class.__module__

        ci_in = {}
        username = None
        password = None
        rsakey = None

        service_info = self._service_info
        for key, value in service_info.items():
            if key == 'username_map':
                if value and self.service_ip in value:
                    username = value[self.service_ip]
            elif key == 'username_override':
                if username is None:
                    username = value
            elif key == 'password_map':
                if value and self.service_ip in value:
                    password = value[self.service_ip]
            elif key == 'password_override':
                if password is None:
                    password = value
            elif key == 'rsakey_map':
                if value and self.service_ip in value:
                    rsakey = value[self.service_ip]
            elif key == 'rsakey_override':
                if rsakey is None:
                    rsakey = value
            elif key not in ('password', 'username', 'rsakey'):
                ci_in[key] = value

        if ('username' in service_info
                and service_info['username'] is not None):
            username = service_info['username']
        if ('password' in service_info
                and service_info['password'] is not None):
            password = service_info['password']
        if ('rsakey' in service_info and service_info['rsakey'] is not None):
            rsakey = service_info['rsakey']

        self._ssh_spawn(self.service_ip,
                        username=username,
                        password=password,
                        rsakey=rsakey,
                        **ci_in)

        cmd = ("python -c 'import %s; import %s; %s(\"%s\", %d)'" %
               (srf_module, sc_module, service_run_func, service_qualname,
                self.service_port))
        self.cmd(cmd)
        logger.debug("Spawned SSH handles for services")
示例#5
0
produced during a run.
"""

from __future__ import print_function

import argparse

import combtest.encode as encode
import combtest.runner as runner
import combtest.utils as utils
import combtest.walk as walk

ACCEPTED_COMMANDS = ('step', 'replay')

DEFAULT_REPLAY_FUNC = runner.replay_walk
DEFAULT_REPLAY_FUNC_NAME = utils.get_class_qualname(DEFAULT_REPLAY_FUNC)

_COMMAND_HELP = {
    'step': 'Replay, one step at a time',
    'replay': 'Replay the walk'
}
COMMAND_HELP = "\n".join(["%s: %s" % (k, v) for k, v in _COMMAND_HELP.items()])


def load_from_trace(trace_file, walk_id):
    """
    Load a ``Walk`` from a trace file.

    :param str trace_file: a path to the trace file
    :param int walk_id: a ``walk_id`` which appears in the trace file
    """
示例#6
0
 def to_json(self):
     return (self._options, utils.get_class_qualname(self._action_class))
示例#7
0
def run_tests(
    walk_order,
    state=None,
    verbose=1,
    logger_port=None,
    runner_class=MultistageWalkRunningService,
    service_group_class=ContinuingWalkServiceGroup,
    service_infos=None,
    service_handler_class=bootstrap.ServiceHandler_Local,
    max_thread_count=None,
    gather_states=False,
    log_dir=None,
):
    """
    Run a collection of :class:`combtest.walk.Walk`. This should be the main
    way to execute ``Walks`` for most users. This is the only interface that
    supports correct execution of a :class:`combtest.action.SerialAction`.

     You can provide some instance to serve as the state passed around during
     the tests. There are two important details to know about this:
      * The state must be JSON-ifiable, but py-combtest provides a convenience
        pattern to help with that. See :func:`encode`.
      * Shallow copies of the state will be made, via copy.copy(), since each
        test owns its own copy. You may want to e.g. override __copy__ if
        the details of the copy are important to you.

    :param iterable walk_order: An iterable of iterables which produce
                                :class:`combtest.action.Action`. Example: a
                                list of iterables produced by
                                ``MyActionClass.get_option_set()``.
    :param object state: a state to copy and pass to the ``Walks`` when we
                         execute them.
    :param int verbose: 0-2 verbosity setting. At 2 an additional verbose level
                        log will be produced.
    :param int logger_port: the port number where our local logger should
                            accept data.
    :param combtest.worker.CoordinatorService runner_class: the type of Walk
                                                            execution service
                                                            to use.
    :param combtest.worker.ServiceGroup service_group_class: the type of
                                                             ``ServiceGroup``
                                                             we will use to
                                                             coordinate remote
                                                             executors
    :param iterable service_infos: An iterable of any extra infos we need to
                                   bootstrap the remote services. See
                                   :class:`combtest.bootstrap.ServiceHandleArray`.
    :param combtest.bootstrap.ServiceHandler service_handler_class: Type of
                        ``ServiceHandler`` to use to bootstrap the services.
    :param bool gather_states: if True or 1, gather and return all ``states``
                               from the remote services at the end of the run.
                               Will be returned as a mapping ip->[state, ...].
                               if 2, gather extra info about the run of the
                               walk, such as if it was canceled.
                               else the returned states will be None
    :param int max_thread_count: Max number of ``Walk`` executing threads that
                                 each service will use.
    :param str log_dir: Directory where we will store traces, debug logs, etc.
                        Remote services will also attempt to store logs to
                        the same path.

    :raises RuntimeError: when remote services can't be established and
                          connected to.
    :return: count of walks run, count of walk execution errors, count of walk
             segments run, total elapsed time, remote states if
             ``gather_state == True`` else None, the location of the master
             log file, where applicable.
    """

    if logger_port is None:
        logger_port = config.get_logger_port()

    if verbose == 0:
        central_logger.set_level(central_logger.WARNING)
    elif verbose == 1:
        central_logger.set_level(central_logger.INFO)
    else:
        central_logger.set_level(central_logger.DEBUG)

    my_ip = utils.get_my_IP()

    if log_dir is not None:
        central_logger.log_status("Log files will be at: %s", log_dir)

        # Used to give us some data that connects us back to the remote
        # workers. e.g. where their logs are being stored.
        central_logger.add_op_trace(log_dir, central_logger.OpTracer)
        central_logger.log_status("Log master at: %s", logger.op_trace.fname)
        # TODO? Pull files back from remote side via rpyc?

    # Set up remote logging w/local printing
    central_logger.start_recv_remote_logs(my_ip, logger_port)

    sg = None

    try:
        # Get the test case generator.
        wo = walk.WalkOptions(walk_order)

        # Bring up services across the cluster which can execute Walks in parallel.
        # See worker.py docs on the wiki for details about how this works.
        service_qualname = utils.get_class_qualname(runner_class)
        central_logger.log_status("Bringing up services to run some tests")
        sg = service_group_class(service_qualname,
                                 service_infos=service_infos,
                                 service_handler_class=service_handler_class)

        remote_log_locations = sg.start_remote_logging(my_ip,
                                                       logger_port,
                                                       log_dir,
                                                       verbose=verbose)

        master_location = ""

        remote_logs = []
        for logs in remote_log_locations.values():
            remote_logs.extend(logs)
        if any(remote_logs):
            logger.trace_op(id='master')
            for ip, log_locations in remote_log_locations.items():
                logger.trace_op(ip=ip, logs=log_locations)
            master_location = logger.op_trace.fname
        master_log = {
            'master': master_location,
            'remote': remote_log_locations
        }

        logger.info("Services are up")

        logger.info("Scattering work")
        start_time = time.time()

        master_worker_ids = {}
        for epoch_list in wo:
            logger.info("Epoch list has %d epochs", len(epoch_list))
            for epoch in epoch_list:
                state_copy = copy.copy(state)
                if epoch.serial_action is not None:
                    for branch_id in epoch.branch_ids:
                        state_copy = epoch.serial_action(
                            state=state_copy,
                            branch_id=branch_id,
                            epoch=epoch,
                            service=sg,
                            worker_ids=master_worker_ids)

                _, count, worker_ids = sg.scatter_work(
                    epoch, state=state_copy, max_thread_count=max_thread_count)
                logger.info("Epoch of work sent; %d work items", count)

                for connection_info, ids in worker_ids.items():
                    if connection_info not in master_worker_ids:
                        master_worker_ids[connection_info] = []
                    master_worker_ids[connection_info].extend(ids)

            logger.info("Epochs started; waiting for " "them to finish")
            sg.join()

        logger.info("Work finished; gathering responses")

        segment_count = 0
        error_count = 0
        walk_count = 0
        # List of walk_ids
        failed_tests = []
        for connection_info, ids in master_worker_ids.items():
            if len(ids) == 0:
                # No work sent, e.g. because we didn't have many walks
                continue

            # NOTE: taking advantage of singleton
            wid = ids[0]
            wids = {
                connection_info: [
                    wid,
                ]
            }
            current_segment_count, current_error_count, \
                    current_walk_count, current_failed_walk_ids = \
                    sg.gather_all_runner_states(wids)
            segment_count += current_segment_count
            error_count += current_error_count
            walk_count += current_walk_count
            failed_tests.extend(current_failed_walk_ids)

        elapsed = time.time() - start_time
        central_logger.log_status("Ran %d walks (%d errors) in %0.2fs" %
                                  (walk_count, error_count, elapsed))

        if gather_states is True or gather_states == 1:
            states_out = sg.gather_all_states(worker_ids, full=False)
        elif gather_states == 2:
            states_out = sg.gather_all_states(worker_ids, full=True)
        else:
            states_out = None

        if log_dir is not None:
            sg.provide_logs(log_dir)
    finally:
        central_logger.stop_recv_remote_logs()

        try:
            if sg is not None:
                sg.shutdown(hard=True)
        except Exception:
            pass

    return Result(walk_count, error_count, segment_count, elapsed, states_out,
                  master_log, failed_tests)