def start_cmd(self, service_class, service_run_func=DEFAULT_SERVICE_RUN_FUNC): if isinstance(service_run_func, string_types): service_run_func = utils.get_class_from_qualname(service_run_func) if not isinstance(service_class, string_types): service_qualname = utils.get_class_qualname(service_class) else: service_qualname = service_class proc = multiprocessing.Process(target=service_run_func, args=( service_qualname, self.service_port, )) # NOTE: even daemonized it will still share stdout/err with this proc. # That is a good thing imho, since we will see tracebacks. SSH-based # services will not enjoy the same advantage by default.` proc.daemon = True proc.start() # TODO: make sure we didn't die quick? But how long should we wait to # find out? And what if we die during executing a handler? A # ping-response pattern to check aliveness seems crazy overkill for a # lib like this, but not hard. # # If the user set up remote logging, they will probably see the # traceback for a death during execution. But they won't see a start up # death, since they won't have called in to set up remote logging yet. # Up to them to do logging to file? That seems fair for now. self._proc = proc
def default(self, obj): if hasattr(obj, 'to_json'): qualname = get_class_qualname(obj.__class__) qualname = _DEFAULT_MAGIC + qualname return {qualname: obj.to_json()} raise TypeError("Cannot figure out how to JSONify: %s" % repr(obj))
def start_remote_services(service_class): """ Simplest method to start some services remotely. :param class service_class: a decendent of :class:`CoordinatorService` :raises RuntimeError: on various issues with service start up :return: A ServiceGroup wrapping the started services """ service_qualname = utils.get_class_qualname(service_class) return ServiceGroup(service_qualname)
def start_cmd(self, service_class, service_run_func=bootstrap.DEFAULT_SERVICE_RUN_FUNC): if isinstance(service_run_func, string_types): service_run_func = utils.get_class_from_qualname(service_run_func) srf_module = service_run_func.__module__ service_run_func = utils.get_class_qualname(service_run_func) if not isinstance(service_class, string_types): service_qualname = utils.get_class_qualname(service_class) else: service_qualname = service_class service_class = utils.get_class_from_qualname(service_class) sc_module = service_class.__module__ ci_in = {} username = None password = None rsakey = None service_info = self._service_info for key, value in service_info.items(): if key == 'username_map': if value and self.service_ip in value: username = value[self.service_ip] elif key == 'username_override': if username is None: username = value elif key == 'password_map': if value and self.service_ip in value: password = value[self.service_ip] elif key == 'password_override': if password is None: password = value elif key == 'rsakey_map': if value and self.service_ip in value: rsakey = value[self.service_ip] elif key == 'rsakey_override': if rsakey is None: rsakey = value elif key not in ('password', 'username', 'rsakey'): ci_in[key] = value if ('username' in service_info and service_info['username'] is not None): username = service_info['username'] if ('password' in service_info and service_info['password'] is not None): password = service_info['password'] if ('rsakey' in service_info and service_info['rsakey'] is not None): rsakey = service_info['rsakey'] self._ssh_spawn(self.service_ip, username=username, password=password, rsakey=rsakey, **ci_in) cmd = ("python -c 'import %s; import %s; %s(\"%s\", %d)'" % (srf_module, sc_module, service_run_func, service_qualname, self.service_port)) self.cmd(cmd) logger.debug("Spawned SSH handles for services")
produced during a run. """ from __future__ import print_function import argparse import combtest.encode as encode import combtest.runner as runner import combtest.utils as utils import combtest.walk as walk ACCEPTED_COMMANDS = ('step', 'replay') DEFAULT_REPLAY_FUNC = runner.replay_walk DEFAULT_REPLAY_FUNC_NAME = utils.get_class_qualname(DEFAULT_REPLAY_FUNC) _COMMAND_HELP = { 'step': 'Replay, one step at a time', 'replay': 'Replay the walk' } COMMAND_HELP = "\n".join(["%s: %s" % (k, v) for k, v in _COMMAND_HELP.items()]) def load_from_trace(trace_file, walk_id): """ Load a ``Walk`` from a trace file. :param str trace_file: a path to the trace file :param int walk_id: a ``walk_id`` which appears in the trace file """
def to_json(self): return (self._options, utils.get_class_qualname(self._action_class))
def run_tests( walk_order, state=None, verbose=1, logger_port=None, runner_class=MultistageWalkRunningService, service_group_class=ContinuingWalkServiceGroup, service_infos=None, service_handler_class=bootstrap.ServiceHandler_Local, max_thread_count=None, gather_states=False, log_dir=None, ): """ Run a collection of :class:`combtest.walk.Walk`. This should be the main way to execute ``Walks`` for most users. This is the only interface that supports correct execution of a :class:`combtest.action.SerialAction`. You can provide some instance to serve as the state passed around during the tests. There are two important details to know about this: * The state must be JSON-ifiable, but py-combtest provides a convenience pattern to help with that. See :func:`encode`. * Shallow copies of the state will be made, via copy.copy(), since each test owns its own copy. You may want to e.g. override __copy__ if the details of the copy are important to you. :param iterable walk_order: An iterable of iterables which produce :class:`combtest.action.Action`. Example: a list of iterables produced by ``MyActionClass.get_option_set()``. :param object state: a state to copy and pass to the ``Walks`` when we execute them. :param int verbose: 0-2 verbosity setting. At 2 an additional verbose level log will be produced. :param int logger_port: the port number where our local logger should accept data. :param combtest.worker.CoordinatorService runner_class: the type of Walk execution service to use. :param combtest.worker.ServiceGroup service_group_class: the type of ``ServiceGroup`` we will use to coordinate remote executors :param iterable service_infos: An iterable of any extra infos we need to bootstrap the remote services. See :class:`combtest.bootstrap.ServiceHandleArray`. :param combtest.bootstrap.ServiceHandler service_handler_class: Type of ``ServiceHandler`` to use to bootstrap the services. :param bool gather_states: if True or 1, gather and return all ``states`` from the remote services at the end of the run. Will be returned as a mapping ip->[state, ...]. if 2, gather extra info about the run of the walk, such as if it was canceled. else the returned states will be None :param int max_thread_count: Max number of ``Walk`` executing threads that each service will use. :param str log_dir: Directory where we will store traces, debug logs, etc. Remote services will also attempt to store logs to the same path. :raises RuntimeError: when remote services can't be established and connected to. :return: count of walks run, count of walk execution errors, count of walk segments run, total elapsed time, remote states if ``gather_state == True`` else None, the location of the master log file, where applicable. """ if logger_port is None: logger_port = config.get_logger_port() if verbose == 0: central_logger.set_level(central_logger.WARNING) elif verbose == 1: central_logger.set_level(central_logger.INFO) else: central_logger.set_level(central_logger.DEBUG) my_ip = utils.get_my_IP() if log_dir is not None: central_logger.log_status("Log files will be at: %s", log_dir) # Used to give us some data that connects us back to the remote # workers. e.g. where their logs are being stored. central_logger.add_op_trace(log_dir, central_logger.OpTracer) central_logger.log_status("Log master at: %s", logger.op_trace.fname) # TODO? Pull files back from remote side via rpyc? # Set up remote logging w/local printing central_logger.start_recv_remote_logs(my_ip, logger_port) sg = None try: # Get the test case generator. wo = walk.WalkOptions(walk_order) # Bring up services across the cluster which can execute Walks in parallel. # See worker.py docs on the wiki for details about how this works. service_qualname = utils.get_class_qualname(runner_class) central_logger.log_status("Bringing up services to run some tests") sg = service_group_class(service_qualname, service_infos=service_infos, service_handler_class=service_handler_class) remote_log_locations = sg.start_remote_logging(my_ip, logger_port, log_dir, verbose=verbose) master_location = "" remote_logs = [] for logs in remote_log_locations.values(): remote_logs.extend(logs) if any(remote_logs): logger.trace_op(id='master') for ip, log_locations in remote_log_locations.items(): logger.trace_op(ip=ip, logs=log_locations) master_location = logger.op_trace.fname master_log = { 'master': master_location, 'remote': remote_log_locations } logger.info("Services are up") logger.info("Scattering work") start_time = time.time() master_worker_ids = {} for epoch_list in wo: logger.info("Epoch list has %d epochs", len(epoch_list)) for epoch in epoch_list: state_copy = copy.copy(state) if epoch.serial_action is not None: for branch_id in epoch.branch_ids: state_copy = epoch.serial_action( state=state_copy, branch_id=branch_id, epoch=epoch, service=sg, worker_ids=master_worker_ids) _, count, worker_ids = sg.scatter_work( epoch, state=state_copy, max_thread_count=max_thread_count) logger.info("Epoch of work sent; %d work items", count) for connection_info, ids in worker_ids.items(): if connection_info not in master_worker_ids: master_worker_ids[connection_info] = [] master_worker_ids[connection_info].extend(ids) logger.info("Epochs started; waiting for " "them to finish") sg.join() logger.info("Work finished; gathering responses") segment_count = 0 error_count = 0 walk_count = 0 # List of walk_ids failed_tests = [] for connection_info, ids in master_worker_ids.items(): if len(ids) == 0: # No work sent, e.g. because we didn't have many walks continue # NOTE: taking advantage of singleton wid = ids[0] wids = { connection_info: [ wid, ] } current_segment_count, current_error_count, \ current_walk_count, current_failed_walk_ids = \ sg.gather_all_runner_states(wids) segment_count += current_segment_count error_count += current_error_count walk_count += current_walk_count failed_tests.extend(current_failed_walk_ids) elapsed = time.time() - start_time central_logger.log_status("Ran %d walks (%d errors) in %0.2fs" % (walk_count, error_count, elapsed)) if gather_states is True or gather_states == 1: states_out = sg.gather_all_states(worker_ids, full=False) elif gather_states == 2: states_out = sg.gather_all_states(worker_ids, full=True) else: states_out = None if log_dir is not None: sg.provide_logs(log_dir) finally: central_logger.stop_recv_remote_logs() try: if sg is not None: sg.shutdown(hard=True) except Exception: pass return Result(walk_count, error_count, segment_count, elapsed, states_out, master_log, failed_tests)