Пример #1
0
    def __init__(self,
                 useDLM=True,
                 dfmsPath=None,
                 host=None,
                 error_listener=None,
                 enable_luigi=False,
                 events_port=constants.NODE_DEFAULT_EVENTS_PORT,
                 rpc_port=constants.NODE_DEFAULT_RPC_PORT,
                 max_threads=0):

        self._dlm = DataLifecycleManager() if useDLM else None
        self._host = host or 'localhost'
        self._events_port = events_port
        self._rpc_port = rpc_port
        self._sessions = {}

        # dfmsPath contains code added by the user with possible
        # DROP applications
        if dfmsPath:
            dfmsPath = os.path.expanduser(dfmsPath)
            if os.path.isdir(dfmsPath):
                logger.info("Adding %s to the system path", dfmsPath)
                sys.path.append(dfmsPath)

        # Error listener used by users to deal with errors coming from specific
        # Drops in whatever way they want
        if error_listener:
            if isinstance(error_listener, six.string_types):
                try:
                    parts = error_listener.split('.')
                    module = importlib.import_module('.'.join(parts[:-1]))
                except:
                    logger.exception('Creating the error listener')
                    raise
                error_listener = getattr(module, parts[-1])()
            if not hasattr(error_listener, 'on_error'):
                raise ValueError(
                    "error_listener doesn't contain an on_error method")
        self._error_listener = error_listener

        self._enable_luigi = enable_luigi

        # Start our thread pool
        if max_threads == 0:
            self._threadpool = None
        else:
            max_threads = max(min(max_threads, 200), 1)
            logger.info("Initializing thread pool with %d threads",
                        max_threads)
            self._threadpool = multiprocessing.pool.ThreadPool(
                processes=max_threads)

        # Event handler that only logs status changes
        debugging = logger.isEnabledFor(logging.DEBUG)
        self._logging_event_listener = LogEvtListener() if debugging else None

        # Start the mix-ins
        self.start()
Пример #2
0
    def __init__(self, useDLM=True, dfmsPath=None, host=None, error_listener=None, enable_luigi=False):
        self._dlm = DataLifecycleManager() if useDLM else None
        self._sessions = {}
        self._host = host

        # dfmsPath contains code added by the user with possible
        # DROP applications
        if dfmsPath:
            dfmsPath = os.path.expanduser(dfmsPath)
            if os.path.isdir(dfmsPath):
                if logger.isEnabledFor(logging.INFO):
                    logger.info("Adding %s to the system path" % (dfmsPath))
                sys.path.append(dfmsPath)

        # Error listener used by users to deal with errors coming from specific
        # Drops in whatever way they want
        if error_listener:
            if isinstance(error_listener, basestring):
                try:
                    parts = error_listener.split(".")
                    module = importlib.import_module(".".join(parts[:-1]))
                except:
                    logger.exception("Creating the error listener")
                    raise
                error_listener = getattr(module, parts[-1])()
            if not hasattr(error_listener, "on_error"):
                raise ValueError("error_listener doesn't contain an on_error method")
        self._error_listener = error_listener

        self._enable_luigi = enable_luigi
Пример #3
0
class NodeManagerBase(DROPManager):
    """
    Base class for a DROPManager that creates and holds references to DROPs.

    A NodeManagerBase is the ultimate responsible of handling DROPs. It does so not
    directly, but via Sessions, which represent and encapsulate separate,
    independent DROP graph executions. All DROPs created by the
    different Sessions are also given to a common DataLifecycleManager, which
    takes care of expiring them when needed and replicating them.

    Since a NodeManagerBase can handle more than one session, in principle only one
    NodeManagerBase is needed for each computing node, thus its name.
    """

    __metaclass__ = abc.ABCMeta

    def __init__(self,
                 useDLM=True,
                 dfmsPath=None,
                 host=None,
                 error_listener=None,
                 enable_luigi=False,
                 events_port=constants.NODE_DEFAULT_EVENTS_PORT,
                 rpc_port=constants.NODE_DEFAULT_RPC_PORT,
                 max_threads=0):

        self._dlm = DataLifecycleManager() if useDLM else None
        self._host = host or 'localhost'
        self._events_port = events_port
        self._rpc_port = rpc_port
        self._sessions = {}

        # dfmsPath contains code added by the user with possible
        # DROP applications
        if dfmsPath:
            dfmsPath = os.path.expanduser(dfmsPath)
            if os.path.isdir(dfmsPath):
                logger.info("Adding %s to the system path", dfmsPath)
                sys.path.append(dfmsPath)

        # Error listener used by users to deal with errors coming from specific
        # Drops in whatever way they want
        if error_listener:
            if isinstance(error_listener, six.string_types):
                try:
                    parts = error_listener.split('.')
                    module = importlib.import_module('.'.join(parts[:-1]))
                except:
                    logger.exception('Creating the error listener')
                    raise
                error_listener = getattr(module, parts[-1])()
            if not hasattr(error_listener, 'on_error'):
                raise ValueError(
                    "error_listener doesn't contain an on_error method")
        self._error_listener = error_listener

        self._enable_luigi = enable_luigi

        # Start our thread pool
        if max_threads == 0:
            self._threadpool = None
        else:
            max_threads = max(min(max_threads, 200), 1)
            logger.info("Initializing thread pool with %d threads",
                        max_threads)
            self._threadpool = multiprocessing.pool.ThreadPool(
                processes=max_threads)

        # Event handler that only logs status changes
        debugging = logger.isEnabledFor(logging.DEBUG)
        self._logging_event_listener = LogEvtListener() if debugging else None

        # Start the mix-ins
        self.start()

    @abc.abstractmethod
    def start(self):
        """
        Starts any background task required by this Node Manager
        """

    @abc.abstractmethod
    def shutdown(self):
        """
        Stops any pending background task run by this Node Manager
        """

    @abc.abstractmethod
    def subscribe(self, host, port):
        """
        Subscribes this Node Manager to events published in from ``host``:``port``
        """

    @abc.abstractmethod
    def publish_event(self, evt):
        """
        Publishes the event ``evt`` for other Node Managers to receive it
        """

    @abc.abstractmethod
    def get_rpc_client(self, hostname, port):
        """
        Creates an RPC client connected to the node manager running in
        ``host``:``port``, and its closing method, as a 2-tuple.
        """

    def deliver_event(self, evt):
        """
        Method called by subclasses when a new event has arrived through the
        subscription mechanism.
        """
        if not evt.session_id in self._sessions:
            logger.warning("No session %s found, event will be dropped" %
                           (evt.session_id))
            return
        self._sessions[evt.session_id].deliver_event(evt)

    def _check_session_id(self, session_id):
        if session_id not in self._sessions:
            raise NoSessionException(session_id)

    def createSession(self, sessionId):
        if sessionId in self._sessions:
            raise SessionAlreadyExistsException(sessionId)
        self._sessions[sessionId] = Session(sessionId, self._host,
                                            self._error_listener,
                                            self._enable_luigi)
        logger.info('Created session %s', sessionId)

    def getSessionStatus(self, sessionId):
        self._check_session_id(sessionId)
        return self._sessions[sessionId].status

    def linkGraphParts(self, sessionId, lhOID, rhOID, linkType):
        self._check_session_id(sessionId)
        self._sessions[sessionId].linkGraphParts(lhOID, rhOID, linkType)

    def addGraphSpec(self, sessionId, graphSpec):
        self._check_session_id(sessionId)
        self._sessions[sessionId].addGraphSpec(graphSpec)

    def getGraphStatus(self, sessionId):
        self._check_session_id(sessionId)
        return self._sessions[sessionId].getGraphStatus()

    def getGraph(self, sessionId):
        self._check_session_id(sessionId)
        return self._sessions[sessionId].getGraph()

    def deploySession(self, sessionId, completedDrops=[]):
        self._check_session_id(sessionId)
        session = self._sessions[sessionId]

        def foreach(drop):
            if self._threadpool is not None:
                drop._tp = self._threadpool
            if self._dlm:
                self._dlm.addDrop(drop)

            # Remote event forwarding
            evt_listener = NMDropEventListener(self, sessionId)
            if isinstance(drop, AppDROP):
                drop.subscribe(evt_listener, 'producerFinished')
            else:
                drop.subscribe(evt_listener, 'dropCompleted')

            # Purely for logging purposes
            log_evt_listener = self._logging_event_listener
            if log_evt_listener:
                drop.subscribe(log_evt_listener, 'status')
                if isinstance(drop, AppDROP):
                    drop.subscribe(log_evt_listener, 'execStatus')

        session.deploy(completedDrops=completedDrops, foreach=foreach)

    def destroySession(self, sessionId):
        self._check_session_id(sessionId)
        session = self._sessions.pop(sessionId)
        session.destroy()

    def getSessionIds(self):
        return list(self._sessions.keys())

    def getGraphSize(self, sessionId):
        self._check_session_id(sessionId)
        session = self._sessions[sessionId]
        return len(session._graph)

    def trigger_drops(self, sessionId, uids):
        self._check_session_id(sessionId)
        t = threading.Thread(target=self._sessions[sessionId].trigger_drops,
                             name="Drop trigger",
                             args=(uids, ))
        t.start()

    def add_node_subscriptions(self, sessionId, relationships):

        logger.debug("Received subscription information: %r", relationships)
        self._check_session_id(sessionId)
        self._sessions[sessionId].add_node_subscriptions(
            sessionId, relationships, self)

        # Set up event channels subscriptions
        for nodesub in relationships:

            host = nodesub
            events_port = constants.NODE_DEFAULT_EVENTS_PORT
            if type(nodesub) is tuple:
                host, events_port, _ = nodesub

            # TODO: we also have to unsubscribe from them at some point
            self.subscribe(host, events_port)

    def get_drop_attribute(self, hostname, port, session_id, uid, name):

        logger.debug("Getting attribute %s for drop %s of session %s at %s:%d",
                     name, uid, session_id, hostname, port)

        client, closer = self.get_rpc_client(hostname, port)

        # The remote method receives the same client used to inspect the remote
        # object, and it closes it when the method is not used anymore
        class remote_method(object):
            def __del__(self):
                closer()

            def __call__(self, *args):
                return client.call_drop(session_id, uid, name, *args)

        # Shortcut to avoid extra calls
        known_methods = ()
        #known_methods = ('open', 'read', 'write', 'close')
        closeit = False
        try:
            if name in known_methods or client.has_method(
                    session_id, uid, name):
                return remote_method()
            closeit = True
            return client.get_drop_property(session_id, uid, name)
        finally:
            if closeit:
                closer()

    def has_method(self, sessionId, uid, mname):
        self._check_session_id(sessionId)
        return self._sessions[sessionId].has_method(uid, mname)

    def get_drop_property(self, sessionId, uuid, prop_name):
        self._check_session_id(sessionId)
        return self._sessions[sessionId].get_drop_property(uuid, prop_name)

    def call_drop(self, sessionId, uid, method, *args):
        self._check_session_id(sessionId)
        return self._sessions[sessionId].call_drop(uid, method, *args)
Пример #4
0
class NodeManager(DROPManager):
    """
    A DROPManager that creates and holds references to DROPs.

    A NodeManager is the ultimate responsible of handling DROPs. It does so not
    directly, but via Sessions, which represent and encapsulate separate,
    independent DROP graph executions. All DROPs created by the
    different Sessions are also given to a common DataLifecycleManager, which
    takes care of expiring them when needed and replicating them.

    Since a NodeManager can handle more than one session, in principle only one
    NodeManager is needed for each computing node, thus its name.
    """

    def __init__(self, useDLM=True, dfmsPath=None, host=None, error_listener=None, enable_luigi=False):
        self._dlm = DataLifecycleManager() if useDLM else None
        self._sessions = {}
        self._host = host

        # dfmsPath contains code added by the user with possible
        # DROP applications
        if dfmsPath:
            dfmsPath = os.path.expanduser(dfmsPath)
            if os.path.isdir(dfmsPath):
                if logger.isEnabledFor(logging.INFO):
                    logger.info("Adding %s to the system path" % (dfmsPath))
                sys.path.append(dfmsPath)

        # Error listener used by users to deal with errors coming from specific
        # Drops in whatever way they want
        if error_listener:
            if isinstance(error_listener, basestring):
                try:
                    parts = error_listener.split(".")
                    module = importlib.import_module(".".join(parts[:-1]))
                except:
                    logger.exception("Creating the error listener")
                    raise
                error_listener = getattr(module, parts[-1])()
            if not hasattr(error_listener, "on_error"):
                raise ValueError("error_listener doesn't contain an on_error method")
        self._error_listener = error_listener

        self._enable_luigi = enable_luigi

    def createSession(self, sessionId):
        if sessionId in self._sessions:
            raise Exception("A session already exists for sessionId %s" % (str(sessionId)))
        self._sessions[sessionId] = Session(sessionId, self._host, self._error_listener, self._enable_luigi)
        if logger.isEnabledFor(logging.INFO):
            logger.info("Created session %s" % (sessionId))

    def getSessionStatus(self, sessionId):
        return self._sessions[sessionId].status

    def quickDeploy(self, sessionId, graphSpec):
        self.createSession(sessionId)
        self.addGraphSpec(sessionId, graphSpec)
        return self.deploySession(sessionId)

    def linkGraphParts(self, sessionId, lhOID, rhOID, linkType):
        self._sessions[sessionId].linkGraphParts(lhOID, rhOID, linkType)

    def addGraphSpec(self, sessionId, graphSpec):
        self._sessions[sessionId].addGraphSpec(graphSpec)

    def getGraphStatus(self, sessionId):
        return self._sessions[sessionId].getGraphStatus()

    def getGraph(self, sessionId):
        return self._sessions[sessionId].getGraph()

    def deploySession(self, sessionId, completedDrops=[]):
        session = self._sessions[sessionId]
        session.deploy(completedDrops=completedDrops)
        roots = session.roots

        # We register the new DROPs with the DLM if there is one
        if self._dlm:
            if logger.isEnabledFor(logging.DEBUG):
                logger.debug("Registering new DROPs with the DataLifecycleManager")
            droputils.breadFirstTraverse(roots, lambda drop: self._dlm.addDrop(drop))

        # Finally, we also collect the Pyro URIs of our DROPs and return them
        uris = {}
        droputils.breadFirstTraverse(roots, lambda drop: uris.__setitem__(drop.uid, drop.uri))
        return uris

    def destroySession(self, sessionId):
        session = self._sessions.pop(sessionId)
        session.destroy()

    def getSessionIds(self):
        return self._sessions.keys()

    def getGraphSize(self, sessionId):
        session = self._sessions[sessionId]
        return len(session._graph)

    def getTemplates(self):

        # TODO: we currently have a hardcoded list of functions, but we should
        #       load these repositories in a different way, like in this
        #       commented code
        # tplDir = os.path.expanduser("~/.dfms/templates")
        # if not os.path.isdir(tplDir):
        #    logger.warning('%s directory not found, no templates available' % (tplDir))
        #    return []
        #
        # templates = []
        # for fname in os.listdir(tplDir):
        #    if not  os.path.isfile(fname): continue
        #    if fname[-3:] != '.py': continue
        #
        #    with open(fname) as f:
        #        m = imp.load_module(fname[-3:], f, fname)
        #        functions = m.list_templates()
        #        for f in functions:
        #            templates.append(_functionAsTemplate(f))

        templates = []
        for f in repository.complex_graph, repository.pip_cont_img_pg, repository.archiving_app:
            templates.append(_functionAsTemplate(f))
        return templates

    def materializeTemplate(self, tpl, sessionId, **tplParams):
        # tpl currently has the form <full.mod.path.functionName>
        parts = tpl.split(".")
        module = importlib.import_module(".".join(parts[:-1]))
        tplFunction = getattr(module, parts[-1])

        # invoke the template function with the given parameters
        # and add the new graph spec to the session
        graphSpec = tplFunction(**tplParams)
        self.addGraphSpec(sessionId, graphSpec)

        if logger.isEnabledFor(logging.INFO):
            logger.info("Added graph from template %s to session %s with params: %s" % (tpl, sessionId, tplParams))