def _test_graph(self, pgCreator, socketListeners=1): if isinstance(pgCreator, basestring): pgCreator = "test.graphsRepository.%s" % (pgCreator) task = FinishGraphExecution(pgCreator=pgCreator) sch = scheduler.CentralPlannerScheduler() w = worker.Worker(scheduler=sch) w.add(task) # Start executing the SocketListenerApps so they open their ports def startSocketListeners(drop): if isinstance(drop, SocketListenerApp): threading.Thread(target=lambda drop: drop.execute(), args=(drop,)).start() droputils.breadFirstTraverse(task.roots, startSocketListeners) # Write to the initial nodes of the graph to trigger the graph execution for i in xrange(socketListeners): threading.Thread( target=utils.writeToRemotePort, name="socketWriter", args=("localhost", 1111 + i, test_data, 2) ).start() # Run the graph! Luigi will either monitor or execute the DROPs w.run() w.stop() # ... but at the end all the nodes of the graph should be completed # and should exist droputils.breadFirstTraverse( task.roots, lambda drop: self.assertTrue( drop.isCompleted() and drop.exists(), "%s is not COMPLETED or doesn't exist" % (drop.uid) ), )
def _test_graph(self, pgCreator, socketListeners=1): if isinstance(pgCreator, six.string_types): pgCreator = "test.graphsRepository.%s" % (pgCreator) task = FinishGraphExecution(pgCreator=pgCreator) sch = scheduler.CentralPlannerScheduler() w = worker.Worker(scheduler=sch) w.add(task) # Start executing the SocketListenerApps so they open their ports for drop, _ in droputils.breadFirstTraverse(task.roots): if isinstance(drop, SocketListenerApp): threading.Thread(target=lambda drop: drop.execute(), args=(drop, )).start() # Write to the initial nodes of the graph to trigger the graph execution for i in range(socketListeners): threading.Thread(target=utils.write_to, name='socketWriter', args=("localhost", 1111 + i, test_data, 2)).start() # Run the graph! Luigi will either monitor or execute the DROPs w.run() w.stop() # ... but at the end all the nodes of the graph should be completed # and should exist for drop, _ in droputils.breadFirstTraverse(task.roots): self.assertTrue( drop.isCompleted() and drop.exists(), "%s is not COMPLETED or doesn't exist" % (drop.uid))
def testBreadthFirstSearch(self): """ Checks that our BFS method is correct """ a, b, c, d, e, f, g, h, i, j = self._createGraph() nodesList = [] droputils.breadFirstTraverse(a, lambda n: nodesList.append(n)) self.assertListEqual(nodesList, [a, b, c, d, e, f, g, h, i, j]) pass
def test_BFSWithFiltering(self): """ Checks that the BFS works if the given function does filtering on the downstream DROPs. """ a, _, c, _, e, _, _, h, _, j = self._createGraph() visitedNodes = [] def filtering(drop, downStreamDrops): downStreamDrops[:] = [x for x in downStreamDrops if x.uid not in ('b','f')] visitedNodes.append(drop) droputils.breadFirstTraverse(a, filtering) self.assertEquals(5, len(visitedNodes)) self.assertListEqual(visitedNodes, [a,c,e,h,j])
def deploySession(self, sessionId, completedDrops=[]): session = self._sessions[sessionId] session.deploy(completedDrops=completedDrops) roots = session.roots # We register the new DROPs with the DLM if there is one if self._dlm: if logger.isEnabledFor(logging.DEBUG): logger.debug("Registering new DROPs with the DataLifecycleManager") droputils.breadFirstTraverse(roots, lambda drop: self._dlm.addDrop(drop)) # Finally, we also collect the Pyro URIs of our DROPs and return them uris = {} droputils.breadFirstTraverse(roots, lambda drop: uris.__setitem__(drop.uid, drop.uri)) return uris
def testBreadthFirstSearch(self): """ Checks that our BFS method is correct """ a, b, c, d, e, f, g, h, i, j = self._createGraph() nodesList = [drop for drop, _ in droputils.breadFirstTraverse(a)] self.assertListEqual([a, b, c, d, e, f, g, h, i, j], nodesList)
def trigger_drops(self, uids): for drop,downStreamDrops in droputils.breadFirstTraverse(self._roots): downStreamDrops[:] = [dsDrop for dsDrop in downStreamDrops if isinstance(dsDrop, AbstractDROP)] if drop.uid in uids: if isinstance(drop, InputFiredAppDROP): drop.async_execute() else: drop.setCompleted()
def getGraphStatus(self): if self.status not in (SessionStates.RUNNING, SessionStates.FINISHED): raise Exception("The session is currently not running, cannot get graph status") statusDict = collections.defaultdict(dict) # We shouldn't traverse the full graph because there might be nodes # attached to our DROPs that are actually part of other DM (and have been # wired together by the DIM after deploying each individual graph on # each of the DMs). # We recognize such nodes because they are actually not an instance of # AbstractDROP (they are Pyro4.Proxy instances). # # The same trick is used in luigi_int.RunDROPTask.requires def addToDict(drop, downStreamDrops): downStreamDrops[:] = [dsDrop for dsDrop in downStreamDrops if isinstance(dsDrop, AbstractDROP)] if isinstance(drop, AppDROP): statusDict[drop.oid]['execStatus'] = drop.execStatus statusDict[drop.oid]['status'] = drop.status droputils.breadFirstTraverse(self._roots, addToDict) return statusDict
def test_BFSWithFiltering(self): """ Checks that the BFS works if the given function does filtering on the downstream DROPs. """ a, _, c, _, e, _, _, h, _, j = self._createGraph() visitedNodes = [] for drop, downStreamDrops in droputils.breadFirstTraverse(a): downStreamDrops[:] = [ x for x in downStreamDrops if x.uid not in ('b', 'f') ] visitedNodes.append(drop) self.assertEqual(5, len(visitedNodes)) self.assertListEqual(visitedNodes, [a, c, e, h, j])
def getGraphStatus(self): if self.status not in (SessionStates.RUNNING, SessionStates.FINISHED): raise InvalidSessionState("The session is currently not running, cannot get graph status") # We shouldn't traverse the full graph because there might be nodes # attached to our DROPs that are actually part of other DM (and have been # wired together by the DIM after deploying each individual graph on # each of the DMs). # We recognize such nodes because they are actually not an instance of # AbstractDROP (they are DropProxy instances). # # The same trick is used in luigi_int.RunDROPTask.requires statusDict = collections.defaultdict(dict) for drop, downStreamDrops in droputils.breadFirstTraverse(self._roots): downStreamDrops[:] = [dsDrop for dsDrop in downStreamDrops if isinstance(dsDrop, AbstractDROP)] if isinstance(drop, AppDROP): statusDict[drop.oid]['execStatus'] = drop.execStatus statusDict[drop.oid]['status'] = drop.status return statusDict
def deploy(self, completedDrops=[], foreach=None): """ Creates the DROPs represented by all the graph specs contained in this session, effectively deploying them. When this method has finished executing a Pyro Daemon will also be up and running, servicing requests to access to all the DROPs belonging to this session """ status = self.status if status != SessionStates.BUILDING: raise InvalidSessionState("Can't deploy this session in its current status: %d" % (status)) self.status = SessionStates.DEPLOYING # Create the real DROPs from the graph specs logger.info("Creating DROPs for session %s", self._sessionId) self._roots = graph_loader.createGraphFromDropSpecList(self._graph.values()) logger.info("%d drops successfully created", len(self._graph)) for drop,_ in droputils.breadFirstTraverse(self._roots): # Register them self._drops[drop.uid] = drop # Register them with the error handler if self._error_status_listener: drop.subscribe(self._error_status_listener, eventType='status') logger.info("Stored all drops, proceeding with further customization") # Start the luigi task that will make sure the graph is executed # If we're not using luigi we still if self._enable_luigi: logger.debug("Starting Luigi FinishGraphExecution task for session %s", self._sessionId) task = luigi_int.FinishGraphExecution(self._sessionId, self._roots) sch = scheduler.CentralPlannerScheduler() w = worker.Worker(scheduler=sch) w.add(task) workerT = threading.Thread(None, self._run, args=[w]) workerT.daemon = True workerT.start() else: leaves = droputils.getLeafNodes(self._roots) logger.info("Adding completion listener to leaf drops") listener = LeavesCompletionListener(leaves, self) for leaf in leaves: if isinstance(leaf, AppDROP): leaf.subscribe(listener, 'producerFinished') else: leaf.subscribe(listener, 'dropCompleted') logger.info("Listener added to leaf drops") # We move to COMPLETED the DROPs that we were requested to # InputFiredAppDROP are here considered as having to be executed and # not directly moved to COMPLETED. # # This is done in a separate iteration at the very end because all drops # to make sure all event listeners are ready self.trigger_drops(completedDrops) # Foreach if foreach: logger.info("Invoking 'foreach' on each drop") for drop,_ in droputils.breadFirstTraverse(self._roots): foreach(drop) logger.info("'foreach' invoked for each drop") # Append proxies logger.info("Creating %d drop proxies", len(self._proxyinfo)) for nm, host, port, local_uid, relname, remote_uid in self._proxyinfo: proxy = DropProxy(nm, host, port, self._sessionId, remote_uid) method = getattr(self._drops[local_uid], relname) method(proxy, False) self.status = SessionStates.RUNNING logger.info("Session %s is now RUNNING", self._sessionId)
def deploy(self, completedDrops=[]): """ Creates the DROPs represented by all the graph specs contained in this session, effectively deploying them. When this method has finished executing a Pyro Daemon will also be up and running, servicing requests to access to all the DROPs belonging to this session """ status = self.status if status != SessionStates.BUILDING: raise Exception("Can't deploy this session in its current status: %d" % (status)) self.status = SessionStates.DEPLOYING # Create the Pyro daemon that will serve the DROP proxies and start it if logger.isEnabledFor(logging.DEBUG): logger.debug("Starting Pyro4 Daemon for session %s" % (self._sessionId)) self._daemon = Pyro4.Daemon(host=self._host) self._daemonT = threading.Thread(target = lambda: self._daemon.requestLoop(), name="Session %s Pyro Daemon" % (self._sessionId)) self._daemonT.daemon = True self._daemonT.start() # Create the real DROPs from the graph specs if logger.isEnabledFor(logging.DEBUG): logger.debug("Creating DROPs for session %s" % (self._sessionId)) self._roots = graph_loader.createGraphFromDropSpecList(self._graph.values()) # Register them droputils.breadFirstTraverse(self._roots, self._registerDrop) # Register them with the error handler # TODO: We should probably merge all these breadFirstTraverse calls into # a single one to avoid so much iteration through the drops if self._error_status_listener: def register_error_status_listener(drop): drop.subscribe(self._error_status_listener, eventType='status') droputils.breadFirstTraverse(self._roots, register_error_status_listener) # We move to COMPLETED the DROPs that we were requested to # InputFiredAppDROP are here considered as having to be executed and # not directly moved to COMPLETED. # TODO: We should possibly unify this initial triggering into a more # solid concept that encompasses these two and other types of DROPs def triggerDrop(drop): if drop.uid in completedDrops: if isinstance(drop, InputFiredAppDROP): t = threading.Thread(target=lambda:drop.execute()) t.daemon = True t.start() else: drop.setCompleted() droputils.breadFirstTraverse(self._roots, triggerDrop) # Start the luigi task that will make sure the graph is executed # If we're not using luigi we still if self._enable_luigi: if logger.isEnabledFor(logging.DEBUG): logger.debug("Starting Luigi FinishGraphExecution task for session %s" % (self._sessionId)) task = luigi_int.FinishGraphExecution(self._sessionId, self._roots) sch = scheduler.CentralPlannerScheduler() w = worker.Worker(scheduler=sch) w.add(task) workerT = threading.Thread(None, self._run, args=[w]) workerT.daemon = True workerT.start() else: leaves = droputils.getLeafNodes(self._roots) logger.debug("Adding completion listener to leaf drops %r", leaves) listener = LeavesCompletionListener(leaves, self) for leaf in leaves: leaf.subscribe(listener, 'dropCompleted') leaf.subscribe(listener, 'producerFinished') self.status = SessionStates.RUNNING if logger.isEnabledFor(logging.INFO): logger.info("Session %s is now RUNNING" % (self._sessionId))