Exemplo n.º 1
0
def race(cfg, kill_running_processes=False):
    logger = logging.getLogger(__name__)

    if kill_running_processes:
        logger.info("Killing running Rally processes")

        # Kill any lingering Rally processes before attempting to continue - the actor system needs to be a singleton on this machine
        # noinspection PyBroadException
        try:
            process.kill_running_rally_instances()
        except KeyboardInterrupt:
            raise exceptions.UserInterrupted(
                "User has cancelled the benchmark whilst terminating Rally instances."
            ) from None
        except BaseException:
            logger.exception(
                "Could not terminate potentially running Rally instances correctly. Attempting to go on anyway."
            )
    else:
        other_rally_processes = process.find_all_other_rally_processes()
        if other_rally_processes:
            pids = [p.pid for p in other_rally_processes]

            msg = (
                f"There are other Rally processes running on this machine (PIDs: {pids}) but only one Rally "
                f"benchmark is allowed to run at the same time.\n\nYou can use --kill-running-processes flag "
                f"to kill running processes automatically and allow Rally to continue to run a new benchmark. "
                f"Otherwise, you need to manually kill them.")
            raise exceptions.RallyError(msg)

    with_actor_system(racecontrol.run, cfg)
Exemplo n.º 2
0
def race(cfg, sources=False, distribution=False, external=False, docker=False):
    logger = logging.getLogger(__name__)
    # at this point an actor system has to run and we should only join
    actor_system = actor.bootstrap_actor_system(try_join=True)
    benchmark_actor = actor_system.createActor(
        BenchmarkActor, targetActorRequirements={"coordinator": True})
    try:
        result = actor_system.ask(
            benchmark_actor, Setup(cfg, sources, distribution, external,
                                   docker))
        if isinstance(result, Success):
            logger.info("Benchmark has finished successfully.")
        # may happen if one of the load generators has detected that the user has cancelled the benchmark.
        elif isinstance(result, actor.BenchmarkCancelled):
            logger.info(
                "User has cancelled the benchmark (detected by actor).")
        elif isinstance(result, actor.BenchmarkFailure):
            logger.error("A benchmark failure has occurred")
            raise exceptions.RallyError(result.message, result.cause)
        else:
            raise exceptions.RallyError(
                "Got an unexpected result during benchmarking: [%s]." %
                str(result))
    except KeyboardInterrupt:
        logger.info(
            "User has cancelled the benchmark (detected by race control).")
        # notify the coordinator so it can properly handle this state. Do it blocking so we don't have a race between this message
        # and the actor exit request.
        actor_system.ask(benchmark_actor, actor.BenchmarkCancelled())
        raise exceptions.UserInterrupted(
            "User has cancelled the benchmark (detected by race control)."
        ) from None
    finally:
        logger.info("Telling benchmark actor to exit.")
        actor_system.tell(benchmark_actor, thespian.actors.ActorExitRequest())
Exemplo n.º 3
0
def run(cfg):
    logger = logging.getLogger(__name__)
    name = cfg.opts("race", "pipeline")
    race_id = cfg.opts("system", "race.id")
    console.info(f"Race id is [{race_id}]", logger=logger)
    if len(name) == 0:
        # assume from-distribution pipeline if distribution.version has been specified and --pipeline cli arg not set
        if cfg.exists("mechanic", "distribution.version"):
            name = "from-distribution"
        else:
            name = "from-sources"
        logger.info(
            "User specified no pipeline. Automatically derived pipeline [%s].",
            name)
        cfg.add(config.Scope.applicationOverride, "race", "pipeline", name)
    else:
        logger.info("User specified pipeline [%s].", name)

    if os.environ.get("RALLY_RUNNING_IN_DOCKER", "").upper() == "TRUE":
        # in this case only benchmarking remote Elasticsearch clusters makes sense
        if name != "benchmark-only":
            raise exceptions.SystemSetupError(
                "Only the [benchmark-only] pipeline is supported by the Rally Docker image.\n"
                "Add --pipeline=benchmark-only in your Rally arguments and try again.\n"
                "For more details read the docs for the benchmark-only pipeline in {}\n"
                .format(doc_link("pipelines.html#benchmark-only")))

    try:
        pipeline = pipelines[name]
    except KeyError:
        raise exceptions.SystemSetupError(
            "Unknown pipeline [%s]. List the available pipelines with %s list pipelines."
            % (name, PROGRAM_NAME))
    try:
        pipeline(cfg)
    except exceptions.RallyError as e:
        # just pass on our own errors. It should be treated differently on top-level
        raise e
    except KeyboardInterrupt:
        logger.info("User has cancelled the benchmark.")
        raise exceptions.UserInterrupted(
            "User has cancelled the benchmark (detected by race control)."
        ) from None
    except BaseException:
        tb = sys.exc_info()[2]
        raise exceptions.RallyError(
            "This race ended with a fatal crash.").with_traceback(tb)
Exemplo n.º 4
0
def with_actor_system(runnable, cfg):
    logger = logging.getLogger(__name__)
    already_running = actor.actor_system_already_running()
    logger.info("Actor system already running locally? [%s]",
                str(already_running))
    try:
        actors = actor.bootstrap_actor_system(
            try_join=already_running, prefer_local_only=not already_running)
        # We can only support remote benchmarks if we have a dedicated daemon that is not only bound to 127.0.0.1
        cfg.add(config.Scope.application, "system",
                "remote.benchmarking.supported", already_running)
    # This happens when the admin process could not be started, e.g. because it could not open a socket.
    except thespian.actors.InvalidActorAddress:
        logger.info("Falling back to offline actor system.")
        actor.use_offline_actor_system()
        actors = actor.bootstrap_actor_system(try_join=True)
    except KeyboardInterrupt:
        raise exceptions.UserInterrupted(
            "User has cancelled the benchmark (detected whilst bootstrapping actor system)."
        ) from None
    except Exception as e:
        logger.exception("Could not bootstrap actor system.")
        if str(e) == "Unable to determine valid external socket address.":
            console.warn(
                "Could not determine a socket address. Are you running without any network? Switching to degraded mode.",
                logger=logger)
            logger.info("Falling back to offline actor system.")
            actor.use_offline_actor_system()
            actors = actor.bootstrap_actor_system(try_join=True)
        else:
            raise
    try:
        runnable(cfg)
    finally:
        # We only shutdown the actor system if it was not already running before
        if not already_running:
            shutdown_complete = False
            times_interrupted = 0
            while not shutdown_complete and times_interrupted < 2:
                try:
                    # give some time for any outstanding messages to be delivered to the actor system
                    time.sleep(3)
                    logger.info(
                        "Attempting to shutdown internal actor system.")
                    actors.shutdown()
                    # note that this check will only evaluate to True for a TCP-based actor system.
                    timeout = 15
                    while actor.actor_system_already_running() and timeout > 0:
                        logger.info(
                            "Actor system is still running. Waiting...")
                        time.sleep(1)
                        timeout -= 1
                    if timeout > 0:
                        shutdown_complete = True
                        logger.info("Shutdown completed.")
                    else:
                        logger.warning(
                            "Shutdown timed out. Actor system is still running."
                        )
                        break
                except KeyboardInterrupt:
                    times_interrupted += 1
                    logger.warning(
                        "User interrupted shutdown of internal actor system.")
                    console.info(
                        "Please wait a moment for Rally's internal components to shutdown."
                    )
            if not shutdown_complete and times_interrupted > 0:
                logger.warning(
                    "Terminating after user has interrupted actor system shutdown explicitly for [%d] times.",
                    times_interrupted)
                console.println("")
                console.warn(
                    "Terminating now at the risk of leaving child processes behind."
                )
                console.println("")
                console.warn(
                    "The next race may fail due to an unclean shutdown.")
                console.println("")
                console.println(SKULL)
                console.println("")
                raise exceptions.UserInterrupted(
                    f"User has cancelled the benchmark (shutdown not complete as user interrupted "
                    f"{times_interrupted} times).") from None
            elif not shutdown_complete:
                console.warn(
                    "Could not terminate all internal processes within timeout. Please check and force-terminate all Rally processes."
                )