def deploySession(self, sessionId, completedDrops=[]): # Indicate the node managers that they have to subscribe to events # published by some nodes if self._drop_rels.get(sessionId, None): self.replicate(sessionId, self._add_node_subscriptions, "adding relationship information", port=constants.NODE_DEFAULT_REST_PORT, iterable=self._drop_rels[sessionId].items()) logger.info("Delivered node subscription list to node managers") logger.info('Deploying Session %s in all hosts', sessionId) self.replicate(sessionId, self._deploySession, "deploying session") logger.info('Successfully deployed session %s in all hosts', sessionId) # Now that everything is wired up we move the requested DROPs to COMPLETED # (instead of doing it at the DM-level deployment time, in which case # we would certainly miss most of the events) if completedDrops: not_found = set(completedDrops) - set(self._graph) if not_found: raise DaliugeException( "UIDs for completed drops not found: %r", not_found) logger.info('Moving Drops to COMPLETED right away: %r', completedDrops) completed_by_host = group_by_node(completedDrops, self._graph) self.replicate(sessionId, self._triggerDrops, "triggering drops", port=constants.NODE_DEFAULT_REST_PORT, iterable=completed_by_host.items()) logger.info('Successfully triggered drops')
def subscribe(self, host, port): timeout = 5 finished_evt = threading.Event() endpoint = "tcp://%s:%d" % (host, port) self._subscriptions.put(ZMQPubSubMixIn.subscription(endpoint, finished_evt)) if not finished_evt.wait(timeout): raise DaliugeException("ZMQ subscription not achieved within %d seconds" % (timeout,)) logger.info("Subscribed for events originating from %s", endpoint)
def get_drop_property(self, uid, prop_name): if uid not in self._drops: raise NoDropException(uid) try: drop = self._drops[uid] return getattr(drop, prop_name) except AttributeError: raise DaliugeException("%r has no property called %s" % (drop, prop_name))
def call_drop(self, uid, method, *args): if uid not in self._drops: raise NoDropException(uid) try: drop = self._drops[uid] m = getattr(drop, method) except AttributeError: raise DaliugeException("%r has no method called %s" % (drop, method)) return m(*args)
def startDM(self, host): client = remote.createClient(host, pkeyPath=self._pkeyPath) out, err, status = remote.execRemoteWithClient( client, self.subDMCommandLine(host)) if status != 0: logger.error( "Failed to start the DM on %s:%d, stdout/stderr follow:\n==STDOUT==\n%s\n==STDERR==\n%s" % (host, self._dmPort, out, err)) raise DaliugeException("Failed to start the DM on %s:%d" % (host, self._dmPort)) logger.info("DM successfully started at %s:%d", host, self._dmPort)
def start_process(cmd, args, **subproc_args): """ Start 'dlg cmd <args>' in a different process. If `cmd` is not a known command an exception is raised. `subproc_args` are passed down to the process creation via `Popen`. This method returns the new process. """ from dfms.exceptions import DaliugeException if cmd not in commands: raise DaliugeException("Unknown command: %s" % (cmd, )) cmdline = [sys.executable, '-m', __name__, cmd] + args logger.debug("Launching %s", cmdline) return subprocess.Popen(cmdline, **subproc_args)
def ensureDM(self, host, port=None, timeout=10): port = port or self._dmPort logger.debug("Checking DM presence at %s:%d", host, port) if portIsOpen(host, port, timeout): logger.debug("DM already present at %s:%d", host, port) return # We rely on having ssh keys for this, since we're using # the dfms.remote module, which authenticates using public keys logger.debug("DM not present at %s:%d, will start it now", host, port) self.startDM(host) # Wait a bit until the DM starts; if it doesn't we fail if not portIsOpen(host, port, timeout): raise DaliugeException( "DM started at %s:%d, but couldn't connect to it" % (host, port))
def setup_pyro(self): super(MultiplexPyroRPCMixIn, self).setup_pyro() import Pyro4 Pyro4.config.SERVERTYPE = 'thread' Pyro4.config.THREADPOOL_SIZE = 16 Pyro4.config.THREADPOOL_ALLOW_QUEUE = False # So far we currently support ZMQ only EventMixIn = ZMQPubSubMixIn # Check which rpc backend should be used rpc_lib = os.environ.get('DALIUGE_RPC', 'zerorpc') if rpc_lib in ('pyro', 'pyro-multiplex'): # "pyro" defaults to "pyro-multiplex" RpcMixIn = MultiplexPyroRPCMixIn elif rpc_lib == 'pyro-threaded': RpcMixIn = ThreadedPyroRPCMixIn elif rpc_lib == 'zerorpc': RpcMixIn = ZeroRPCMixIn elif rpc_lib == 'rpyc': RpcMixIn = RPyCMixIn else: raise DaliugeException( "Unknown RPC lib %s, use one of pyro, pyro-multiplex, pyro-threaded, zerorpc, rpyc" % (rpc_lib, )) class NodeManager(EventMixIn, RpcMixIn, NodeManagerBase): pass