示例#1
0
def stop(raise_errors=True):
    if actor.actor_system_already_running():
        # noinspection PyBroadException
        try:
            # TheSpian writes the following warning upon start (at least) on Mac OS X:
            #
            # WARNING:root:Unable to get address info for address 103.1.168.192.in-addr.arpa (AddressFamily.AF_INET,\
            # SocketKind.SOCK_DGRAM, 17, 0): <class 'socket.gaierror'> [Errno 8] nodename nor servname provided, or not known
            #
            # Therefore, we will not show warnings but only errors.
            logging.basicConfig(level=logging.ERROR)
            running_system = actor.bootstrap_actor_system(try_join=True)
            running_system.shutdown()
            # await termination...
            console.info("Shutting down actor system.", end="", flush=True)
            while actor.actor_system_already_running():
                console.println(".", end="", flush=True)
                time.sleep(1)
            console.println(" [OK]")
        except BaseException:
            console.error("Could not shut down actor system.")
            if raise_errors:
                # raise again so user can see the error
                raise
    elif raise_errors:
        console.error(
            "Could not shut down actor system: Actor system is not running.")
        sys.exit(1)
示例#2
0
def stop(raise_errors=True):
    if actor.actor_system_already_running():
        try:
            # TheSpian writes the following warning upon start (at least) on Mac OS X:
            #
            # WARNING:root:Unable to get address info for address 103.1.168.192.in-addr.arpa (AddressFamily.AF_INET,\
            # SocketKind.SOCK_DGRAM, 17, 0): <class 'socket.gaierror'> [Errno 8] nodename nor servname provided, or not known
            #
            # Therefore, we will not show warnings but only errors.
            logging.basicConfig(level=logging.ERROR)
            running_system = actor.bootstrap_actor_system(try_join=True)
            running_system.shutdown()
            # await termination...
            console.info("Shutting down actor system.", end="", flush=True)
            while actor.actor_system_already_running():
                console.println(".", end="", flush=True)
                time.sleep(1)
            console.println(" [OK]")
        except BaseException:
            console.error("Could not shut down actor system.")
            if raise_errors:
                # raise again so user can see the error
                raise
    elif raise_errors:
        console.error("Could not shut down actor system: Actor system is not running.")
        sys.exit(1)
示例#3
0
    def on_start_engine(self, msg, sender):
        logger.info("Received signal from race control to start engine.")
        self.race_control = sender
        self.cfg = msg.cfg
        cls = metrics.metrics_store_class(self.cfg)
        self.metrics_store = cls(self.cfg)
        self.metrics_store.open(ctx=msg.open_metrics_context)

        # In our startup procedure we first create all mechanics. Only if this succeeds we'll continue.
        mechanics_and_start_message = []
        hosts = self.cfg.opts("client", "hosts")
        if len(hosts) == 0:
            raise exceptions.LaunchError("No target hosts are configured.")

        if msg.external:
            logger.info("Cluster will not be provisioned by Rally.")
            # just create one actor for this special case and run it on the coordinator node (i.e. here)
            m = self.createActor(NodeMechanicActor,
                                 #globalName="/rally/mechanic/worker/external",
                                 targetActorRequirements={"coordinator": True})
            self.children.append(m)
            mechanics_and_start_message.append((m, msg.for_nodes(ip=hosts)))
        else:
            logger.info("Cluster consisting of %s will be provisioned by Rally." % hosts)
            all_ips_and_ports = to_ip_port(hosts)
            all_node_ips = extract_all_node_ips(all_ips_and_ports)
            for ip_port, nodes in nodes_by_host(all_ips_and_ports).items():
                ip, port = ip_port
                if ip == "127.0.0.1":
                    m = self.createActor(NodeMechanicActor,
                                         #globalName="/rally/mechanic/worker/localhost",
                                         targetActorRequirements={"coordinator": True})
                    self.children.append(m)
                    mechanics_and_start_message.append((m, msg.for_nodes(all_node_ips, ip, port, nodes)))
                else:
                    if self.cfg.opts("system", "remote.benchmarking.supported"):
                        logger.info("Benchmarking against %s with external Rally daemon." % hosts)
                    else:
                        logger.error("User tried to benchmark against %s but no external Rally daemon has been started." % hosts)
                        raise exceptions.SystemSetupError("To benchmark remote hosts (e.g. %s) you need to start the Rally daemon "
                                                          "on each machine including this one." % ip)
                    already_running = actor.actor_system_already_running(ip=ip)
                    logger.info("Actor system on [%s] already running? [%s]" % (ip, str(already_running)))
                    if not already_running:
                        console.println("Waiting for Rally daemon on [%s] " % ip, end="", flush=True)
                    while not actor.actor_system_already_running(ip=ip):
                        console.println(".", end="", flush=True)
                        time.sleep(3)
                    if not already_running:
                        console.println(" [OK]")
                    m = self.createActor(NodeMechanicActor,
                                         #globalName="/rally/mechanic/worker/%s" % ip,
                                         targetActorRequirements={"ip": ip})
                    mechanics_and_start_message.append((m, msg.for_nodes(all_node_ips, ip, port, nodes)))
                    self.children.append(m)
        self.status = "starting"
        self.received_responses = []
        for mechanic_actor, start_message in mechanics_and_start_message:
            self.send(mechanic_actor, start_message)
示例#4
0
    def on_start_engine(self, msg, sender):
        logger.info("Received signal from race control to start engine.")
        self.race_control = sender
        self.cfg = msg.cfg
        self.metrics_store = metrics.InMemoryMetricsStore(self.cfg)
        self.metrics_store.open(ctx=msg.open_metrics_context)

        # In our startup procedure we first create all mechanics. Only if this succeeds we'll continue.
        mechanics_and_start_message = []
        hosts = self.cfg.opts("client", "hosts")
        if len(hosts) == 0:
            raise exceptions.LaunchError("No target hosts are configured.")

        if msg.external:
            logger.info("Cluster will not be provisioned by Rally.")
            # just create one actor for this special case and run it on the coordinator node (i.e. here)
            m = self.createActor(NodeMechanicActor,
                                 #globalName="/rally/mechanic/worker/external",
                                 targetActorRequirements={"coordinator": True})
            self.children.append(m)
            mechanics_and_start_message.append((m, msg.for_nodes(ip=hosts)))
        else:
            logger.info("Cluster consisting of %s will be provisioned by Rally." % hosts)
            all_ips_and_ports = to_ip_port(hosts)
            all_node_ips = extract_all_node_ips(all_ips_and_ports)
            for ip_port, nodes in nodes_by_host(all_ips_and_ports).items():
                ip, port = ip_port
                if ip == "127.0.0.1":
                    m = self.createActor(NodeMechanicActor,
                                         #globalName="/rally/mechanic/worker/localhost",
                                         targetActorRequirements={"coordinator": True})
                    self.children.append(m)
                    mechanics_and_start_message.append((m, msg.for_nodes(all_node_ips, ip, port, nodes)))
                else:
                    if self.cfg.opts("system", "remote.benchmarking.supported"):
                        logger.info("Benchmarking against %s with external Rally daemon." % hosts)
                    else:
                        logger.error("User tried to benchmark against %s but no external Rally daemon has been started." % hosts)
                        raise exceptions.SystemSetupError("To benchmark remote hosts (e.g. %s) you need to start the Rally daemon "
                                                          "on each machine including this one." % ip)
                    already_running = actor.actor_system_already_running(ip=ip)
                    logger.info("Actor system on [%s] already running? [%s]" % (ip, str(already_running)))
                    if not already_running:
                        console.println("Waiting for Rally daemon on [%s] " % ip, end="", flush=True)
                    while not actor.actor_system_already_running(ip=ip):
                        console.println(".", end="", flush=True)
                        time.sleep(3)
                    if not already_running:
                        console.println(" [OK]")
                    m = self.createActor(NodeMechanicActor,
                                         #globalName="/rally/mechanic/worker/%s" % ip,
                                         targetActorRequirements={"ip": ip})
                    mechanics_and_start_message.append((m, msg.for_nodes(all_node_ips, ip, port, nodes)))
                    self.children.append(m)
        self.status = "starting"
        self.received_responses = []
        for mechanic_actor, start_message in mechanics_and_start_message:
            self.send(mechanic_actor, start_message)
示例#5
0
def with_actor_system(runnable, cfg):
    already_running = actor.actor_system_already_running()
    logger.info("Actor system already running locally? [%s]" % str(already_running))
    try:
        actors = actor.bootstrap_actor_system(try_join=already_running, prefer_local_only=not already_running)
        # We can only support remote benchmarks if we have a dedicated daemon that is not only bound to 127.0.0.1
        cfg.add(config.Scope.application, "system", "remote.benchmarking.supported", already_running)
    except RuntimeError as e:
        logger.exception("Could not bootstrap actor system.")
        if str(e) == "Unable to determine valid external socket address.":
            console.warn("Could not determine a socket address. Are you running without any network? Switching to degraded mode.",
                         logger=logger)
            actor.use_offline_actor_system()
            actors = actor.bootstrap_actor_system(try_join=True)
        else:
            raise
    try:
        runnable(cfg)
    finally:
        # We only shutdown the actor system if it was not already running before
        if not already_running:
            shutdown_complete = False
            times_interrupted = 0
            while not shutdown_complete and times_interrupted < 2:
                try:
                    logger.info("Attempting to shutdown internal actor system.")
                    actors.shutdown()
                    # note that this check will only evaluate to True for a TCP-based actor system.
                    timeout = 15
                    while actor.actor_system_already_running() and timeout > 0:
                        logger.info("Actor system is still running. Waiting...")
                        time.sleep(1)
                        timeout -= 1
                    if timeout > 0:
                        shutdown_complete = True
                        logger.info("Shutdown completed.")
                    else:
                        logger.warning("Shutdown timed out. Actor system is still running.")
                        break
                except KeyboardInterrupt:
                    times_interrupted += 1
                    logger.warning("User interrupted shutdown of internal actor system.")
                    console.info("Please wait a moment for Rally's internal components to shutdown.")
            if not shutdown_complete and times_interrupted > 0:
                logger.warning("Terminating after user has interrupted actor system shutdown explicitly for [%d] times." % times_interrupted)
                console.println("")
                console.warn("Terminating now at the risk of leaving child processes behind.")
                console.println("")
                console.warn("The next race may fail due to an unclean shutdown.")
                console.println("")
                console.println(SKULL)
                console.println("")
            elif not shutdown_complete:
                console.warn("Could not terminate all internal processes within timeout. Please check and force-terminate all Rally processes.")
示例#6
0
def with_actor_system(runnable, cfg):
    already_running = actor.actor_system_already_running()
    logger.info("Actor system already running locally? [%s]" % str(already_running))
    try:
        actors = actor.bootstrap_actor_system(try_join=already_running, prefer_local_only=not already_running)
        # We can only support remote benchmarks if we have a dedicated daemon that is not only bound to 127.0.0.1
        cfg.add(config.Scope.application, "system", "remote.benchmarking.supported", already_running)
    except RuntimeError as e:
        logger.exception("Could not bootstrap actor system.")
        if str(e) == "Unable to determine valid external socket address.":
            console.warn("Could not determine a socket address. Are you running without any network? Switching to degraded mode.",
                         logger=logger)
            actor.use_offline_actor_system()
            actors = actor.bootstrap_actor_system(try_join=True)
        else:
            raise
    try:
        runnable(cfg)
    finally:
        # We only shutdown the actor system if it was not already running before
        if not already_running:
            shutdown_complete = False
            times_interrupted = 0
            while not shutdown_complete and times_interrupted < 2:
                try:
                    logger.info("Attempting to shutdown internal actor system.")
                    actors.shutdown()
                    # note that this check will only evaluate to True for a TCP-based actor system.
                    timeout = 15
                    while actor.actor_system_already_running() and timeout > 0:
                        logger.info("Actor system is still running. Waiting...")
                        time.sleep(1)
                        timeout -= 1
                    if timeout > 0:
                        shutdown_complete = True
                        logger.info("Shutdown completed.")
                    else:
                        logger.warning("Shutdown timed out. Actor system is still running.")
                        break
                except KeyboardInterrupt:
                    times_interrupted += 1
                    logger.warning("User interrupted shutdown of internal actor system.")
                    console.info("Please wait a moment for Rally's internal components to shutdown.")
            if not shutdown_complete and times_interrupted > 0:
                logger.warning("Terminating after user has interrupted actor system shutdown explicitly for [%d] times." % times_interrupted)
                console.println("")
                console.warn("Terminating now at the risk of leaving child processes behind.")
                console.println("")
                console.warn("The next race may fail due to an unclean shutdown.")
                console.println("")
                console.println(SKULL)
                console.println("")
            elif not shutdown_complete:
                console.warn("Could not terminate all internal processes within timeout. Please check and force-terminate all Rally processes.")
示例#7
0
def start(args):
    if actor.actor_system_already_running():
        raise exceptions.RallyError(
            "An actor system appears to be already running.")
    actor.bootstrap_actor_system(local_ip=args.node_ip,
                                 coordinator_ip=args.coordinator_ip)
    console.info(
        "Successfully started actor system on node [%s] with coordinator node IP [%s]."
        % (args.node_ip, args.coordinator_ip))
示例#8
0
文件: rallyd.py 项目: levylll/rally
def start(args):
    if actor.actor_system_already_running():
        raise exceptions.RallyError("An actor system appears to be already running.")
    # TheSpian writes the following warning upon start (at least) on Mac OS X:
    #
    # WARNING:root:Unable to get address info for address 103.1.168.192.in-addr.arpa (AddressFamily.AF_INET,\
    # SocketKind.SOCK_DGRAM, 17, 0): <class 'socket.gaierror'> [Errno 8] nodename nor servname provided, or not known
    #
    # Therefore, we will not show warnings but only errors.
    logging.basicConfig(level=logging.ERROR)
    actor.bootstrap_actor_system(local_ip=args.node_ip, coordinator_ip=args.coordinator_ip)
    console.info("Successfully started actor system on node [%s] with coordinator node IP [%s]." % (args.node_ip, args.coordinator_ip))
示例#9
0
def start(args):
    if actor.actor_system_already_running():
        raise exceptions.RallyError("An actor system appears to be already running.")
    # TheSpian writes the following warning upon start (at least) on Mac OS X:
    #
    # WARNING:root:Unable to get address info for address 103.1.168.192.in-addr.arpa (AddressFamily.AF_INET,\
    # SocketKind.SOCK_DGRAM, 17, 0): <class 'socket.gaierror'> [Errno 8] nodename nor servname provided, or not known
    #
    # Therefore, we will not show warnings but only errors.
    logging.basicConfig(level=logging.ERROR)
    actor.bootstrap_actor_system(local_ip=args.node_ip, coordinator_ip=args.coordinator_ip)
    console.info("Successfully started actor system on node [%s] with coordinator node IP [%s]." % (args.node_ip, args.coordinator_ip))
示例#10
0
def status():
    if actor.actor_system_already_running():
        console.println("Running")
    else:
        console.println("Stopped")
示例#11
0
文件: mechanic.py 项目: levylll/rally
    def receiveMessage(self, msg, sender):
        try:
            logger.debug(
                "MechanicActor#receiveMessage(msg = [%s] sender = [%s])" %
                (str(type(msg)), str(sender)))
            if isinstance(msg, StartEngine):
                logger.info(
                    "Received signal from race control to start engine.")
                self.race_control = sender
                # In our startup procedure we first create all mechanics. Only if this succeeds
                mechanics_and_start_message = []

                if msg.external:
                    logger.info(
                        "Target node(s) will not be provisioned by Rally.")
                    # just create one actor for this special case and run it on the coordinator node (i.e. here)
                    m = self.createActor(
                        LocalNodeMechanicActor,
                        globalName="/rally/mechanic/worker/external",
                        targetActorRequirements={"coordinator": True})
                    self.mechanics.append(m)
                    # we can use the original message in this case
                    mechanics_and_start_message.append((m, msg))
                else:
                    hosts = msg.cfg.opts("client", "hosts")
                    logger.info(
                        "Target node(s) %s will be provisioned by Rally." %
                        hosts)
                    if len(hosts) == 0:
                        raise exceptions.LaunchError(
                            "No target hosts are configured.")
                    for host in hosts:
                        ip = host["host"]
                        port = int(host["port"])
                        # user may specify "localhost" on the command line but the problem is that we auto-register the actor system
                        # with "ip": "127.0.0.1" so we convert this special case automatically. In all other cases the user needs to
                        # start the actor system on the other host and is aware that the parameter for the actor system and the
                        # --target-hosts parameter need to match.
                        if ip == "localhost" or ip == "127.0.0.1":
                            m = self.createActor(
                                LocalNodeMechanicActor,
                                globalName="/rally/mechanic/worker/localhost",
                                targetActorRequirements={"coordinator": True})
                            self.mechanics.append(m)
                            mechanics_and_start_message.append(
                                (m, msg.with_port(port)))
                        else:
                            if msg.cfg.opts("system",
                                            "remote.benchmarking.supported"):
                                logger.info(
                                    "Benchmarking against %s with external Rally daemon."
                                    % hosts)
                            else:
                                logger.error(
                                    "User tried to benchmark against %s but no external Rally daemon has been started."
                                    % hosts)
                                raise exceptions.SystemSetupError(
                                    "To benchmark remote hosts (e.g. %s) you need to start the Rally daemon "
                                    "on each machine including this one." % ip)
                            already_running = actor.actor_system_already_running(
                                ip=ip)
                            logger.info(
                                "Actor system on [%s] already running? [%s]" %
                                (ip, str(already_running)))
                            if not already_running:
                                console.println(
                                    "Waiting for Rally daemon on [%s] " % ip,
                                    end="",
                                    flush=True)
                            while not actor.actor_system_already_running(
                                    ip=ip):
                                console.println(".", end="", flush=True)
                                time.sleep(3)
                            if not already_running:
                                console.println(" [OK]")
                            m = self.createActor(
                                RemoteNodeMechanicActor,
                                globalName="/rally/mechanic/worker/%s" % ip,
                                targetActorRequirements={"ip": ip})
                            mechanics_and_start_message.append(
                                (m, msg.with_port(port)))
                            self.mechanics.append(m)
                for mechanic_actor, start_message in mechanics_and_start_message:
                    self.send(mechanic_actor, start_message)
            elif isinstance(msg, EngineStarted):
                self.send(self.race_control, msg)
            elif isinstance(msg, OnBenchmarkStart):
                for m in self.mechanics:
                    self.send(m, msg)
            elif isinstance(msg, Success):
                self.send(self.race_control, msg)
            elif isinstance(msg, Failure):
                self.send(self.race_control, msg)
            elif isinstance(msg, OnBenchmarkStop):
                for m in self.mechanics:
                    self.send(m, msg)
            elif isinstance(msg, BenchmarkStopped):
                # TODO dm: Actually we need to wait for all BenchmarkStopped messages from all our mechanic actors
                # TODO dm: We will actually duplicate cluster level metrics if each of our mechanic actors gathers these...
                self.send(self.race_control, msg)
            elif isinstance(msg, StopEngine):
                for m in self.mechanics:
                    self.send(m, msg)
            elif isinstance(msg, EngineStopped):
                self.send(self.race_control, msg)
                # clear all state as the mechanic might get reused later
                for m in self.mechanics:
                    self.send(m, thespian.actors.ActorExitRequest())
                self.mechanics = []
                # self terminate + slave nodes
                self.send(self.myAddress, thespian.actors.ActorExitRequest())
            elif isinstance(msg, thespian.actors.ChildActorExited):
                # TODO dm: Depending on our state model this can be fine (e.g. when it exited due to our ActorExitRequest message
                # or it could be problematic and mean that an exception has occured.
                pass
            elif isinstance(msg, thespian.actors.PoisonMessage):
                # something went wrong with a child actor
                if isinstance(msg.poisonMessage, StartEngine):
                    raise exceptions.LaunchError(
                        "Could not start benchmark candidate. Are Rally daemons on all targeted machines running?"
                    )
                else:
                    logger.error(
                        "[%s] sent to a child actor has resulted in PoisonMessage"
                        % str(msg.poisonMessage))
                    raise exceptions.RallyError(
                        "Could not communicate with benchmark candidate (unknown reason)"
                    )
        except BaseException:
            logger.exception("Cannot process message [%s]" % msg)
            # usually, we'll notify the sender but in case a child sent something that caused an exception we'd rather
            # have it bubble up to race control. Otherwise, we could play ping-pong with our child actor.
            recipient = self.race_control if sender in self.mechanics else sender
            ex_type, ex_value, ex_traceback = sys.exc_info()
            # avoid "can't pickle traceback objects"
            import traceback
            self.send(
                recipient,
                Failure("Could not execute command (%s)" % ex_value,
                        traceback.format_exc()))
示例#12
0
文件: rally.py 项目: elastic/rally
def with_actor_system(runnable, cfg):
    logger = logging.getLogger(__name__)
    already_running = actor.actor_system_already_running()
    logger.info("Actor system already running locally? [%s]",
                str(already_running))
    try:
        actors = actor.bootstrap_actor_system(
            try_join=already_running, prefer_local_only=not already_running)
        # We can only support remote benchmarks if we have a dedicated daemon that is not only bound to 127.0.0.1
        cfg.add(config.Scope.application, "system",
                "remote.benchmarking.supported", already_running)
    # This happens when the admin process could not be started, e.g. because it could not open a socket.
    except thespian.actors.InvalidActorAddress:
        logger.info("Falling back to offline actor system.")
        actor.use_offline_actor_system()
        actors = actor.bootstrap_actor_system(try_join=True)
    except KeyboardInterrupt:
        raise exceptions.UserInterrupted(
            "User has cancelled the benchmark (detected whilst bootstrapping actor system)."
        ) from None
    except Exception as e:
        logger.exception("Could not bootstrap actor system.")
        if str(e) == "Unable to determine valid external socket address.":
            console.warn(
                "Could not determine a socket address. Are you running without any network? Switching to degraded mode.",
                logger=logger)
            logger.info("Falling back to offline actor system.")
            actor.use_offline_actor_system()
            actors = actor.bootstrap_actor_system(try_join=True)
        else:
            raise
    try:
        runnable(cfg)
    finally:
        # We only shutdown the actor system if it was not already running before
        if not already_running:
            shutdown_complete = False
            times_interrupted = 0
            while not shutdown_complete and times_interrupted < 2:
                try:
                    # give some time for any outstanding messages to be delivered to the actor system
                    time.sleep(3)
                    logger.info(
                        "Attempting to shutdown internal actor system.")
                    actors.shutdown()
                    # note that this check will only evaluate to True for a TCP-based actor system.
                    timeout = 15
                    while actor.actor_system_already_running() and timeout > 0:
                        logger.info(
                            "Actor system is still running. Waiting...")
                        time.sleep(1)
                        timeout -= 1
                    if timeout > 0:
                        shutdown_complete = True
                        logger.info("Shutdown completed.")
                    else:
                        logger.warning(
                            "Shutdown timed out. Actor system is still running."
                        )
                        break
                except KeyboardInterrupt:
                    times_interrupted += 1
                    logger.warning(
                        "User interrupted shutdown of internal actor system.")
                    console.info(
                        "Please wait a moment for Rally's internal components to shutdown."
                    )
            if not shutdown_complete and times_interrupted > 0:
                logger.warning(
                    "Terminating after user has interrupted actor system shutdown explicitly for [%d] times.",
                    times_interrupted)
                console.println("")
                console.warn(
                    "Terminating now at the risk of leaving child processes behind."
                )
                console.println("")
                console.warn(
                    "The next race may fail due to an unclean shutdown.")
                console.println("")
                console.println(SKULL)
                console.println("")
                raise exceptions.UserInterrupted(
                    f"User has cancelled the benchmark (shutdown not complete as user interrupted "
                    f"{times_interrupted} times).") from None
            elif not shutdown_complete:
                console.warn(
                    "Could not terminate all internal processes within timeout. Please check and force-terminate all Rally processes."
                )