Exemplo n.º 1
0
    def _test_graph(self, pgCreator, socketListeners=1):
        if isinstance(pgCreator, six.string_types):
            pgCreator = "test.graphsRepository.%s" % (pgCreator)
        task = FinishGraphExecution(pgCreator=pgCreator)
        sch = scheduler.CentralPlannerScheduler()
        w = worker.Worker(scheduler=sch)
        w.add(task)

        # Start executing the SocketListenerApps so they open their ports
        for drop, _ in droputils.breadFirstTraverse(task.roots):
            if isinstance(drop, SocketListenerApp):
                threading.Thread(target=lambda drop: drop.execute(),
                                 args=(drop, )).start()

        # Write to the initial nodes of the graph to trigger the graph execution
        for i in range(socketListeners):
            threading.Thread(target=utils.write_to,
                             name='socketWriter',
                             args=("localhost", 1111 + i, test_data,
                                   2)).start()

        # Run the graph! Luigi will either monitor or execute the DROPs
        w.run()
        w.stop()

        # ... but at the end all the nodes of the graph should be completed
        # and should exist
        for drop, _ in droputils.breadFirstTraverse(task.roots):
            self.assertTrue(
                drop.isCompleted() and drop.exists(),
                "%s is not COMPLETED or doesn't exist" % (drop.uid))
Exemplo n.º 2
0
    def create_worker(scheduler, worker_processes, assistant=False):
        """Creates a worker

        :returns: The worker
        :rtype: :class:`worker.Worker`
        """
        return worker.Worker(scheduler=scheduler,
                             worker_processes=worker_processes,
                             assistant=assistant)
Exemplo n.º 3
0
def run_task(task):
    worker_num = int(util.luigi_workers)
    # multiprocess = worker_num > 1

    w = worker.Worker(scheduler=scheduler,
                      no_install_shutdown_handler=True,
                      worker_processes=worker_num)
    w.add(task, multiprocess=True, processes=2)
    w.run()
Exemplo n.º 4
0
def trigger_pipeline():
    '''
    run the pipeline
    '''
    doc_dir = 'bcube_demo/docs'

    # capture the main luigi output
    ste = sys.stderr
    sys.stderr = pipeline_output = StringIO.StringIO()
    std = sys.stdout
    sys.stdout = pipeline_debug = StringIO.StringIO()

    pull_from_solr(doc_dir)
    task = MainWorkflow(doc_dir=doc_dir, yaml_file='configs/bcube_demo.yaml')
    luigi.interface.setup_interface_logging()
    sch = luigi.scheduler.CentralPlannerScheduler()
    w = worker.Worker(scheduler=sch)
    w.add(task)
    w.run()

    # store and reset
    piped = pipeline_output.getvalue()
    sys.stderr = ste

    debugs = pipeline_debug.getvalue()
    sys.stdout = std

    # # fake the generator
    # def generate():
    #     for pipe in piped.split('\n'):
    #         yield pipe + '\n'
    #     yield '\n\n####################\n\n'

    #     for debug in debugs.split('\n'):
    #         yield debug + '\n'

    # return Response(generate(), mimetype='text/plain')

    def generate_urn():
        for urn in glob.glob('bcube_demo/triples/*.txt'):
            with open(urn, 'r') as f:
                u = f.read().strip()
            yield u + '\n'

    return Response(generate_urn(), mimetype='text/plain')
Exemplo n.º 5
0
 def create_worker(self, scheduler, worker_processes, assistant=False):
     return worker.Worker(
         scheduler=scheduler,
         worker_processes=worker_processes,
         assistant=assistant
     )
Exemplo n.º 6
0
    def deploy(self, completedDrops=[], foreach=None):
        """
        Creates the DROPs represented by all the graph specs contained in
        this session, effectively deploying them.

        When this method has finished executing a Pyro Daemon will also be
        up and running, servicing requests to access to all the DROPs
        belonging to this session
        """

        status = self.status
        if status != SessionStates.BUILDING:
            raise InvalidSessionState("Can't deploy this session in its current status: %d" % (status))

        self.status = SessionStates.DEPLOYING

        # Create the real DROPs from the graph specs
        logger.info("Creating DROPs for session %s", self._sessionId)

        self._roots = graph_loader.createGraphFromDropSpecList(self._graph.values())
        logger.info("%d drops successfully created", len(self._graph))

        for drop,_ in droputils.breadFirstTraverse(self._roots):

            # Register them
            self._drops[drop.uid] = drop

            # Register them with the error handler
            if self._error_status_listener:
                drop.subscribe(self._error_status_listener, eventType='status')
        logger.info("Stored all drops, proceeding with further customization")

        # Start the luigi task that will make sure the graph is executed
        # If we're not using luigi we still
        if self._enable_luigi:
            logger.debug("Starting Luigi FinishGraphExecution task for session %s", self._sessionId)
            task = luigi_int.FinishGraphExecution(self._sessionId, self._roots)
            sch = scheduler.CentralPlannerScheduler()
            w = worker.Worker(scheduler=sch)
            w.add(task)
            workerT = threading.Thread(None, self._run, args=[w])
            workerT.daemon = True
            workerT.start()
        else:
            leaves = droputils.getLeafNodes(self._roots)
            logger.info("Adding completion listener to leaf drops")
            listener = LeavesCompletionListener(leaves, self)
            for leaf in leaves:
                if isinstance(leaf, AppDROP):
                    leaf.subscribe(listener, 'producerFinished')
                else:
                    leaf.subscribe(listener, 'dropCompleted')
            logger.info("Listener added to leaf drops")

        # We move to COMPLETED the DROPs that we were requested to
        # InputFiredAppDROP are here considered as having to be executed and
        # not directly moved to COMPLETED.
        #
        # This is done in a separate iteration at the very end because all drops
        # to make sure all event listeners are ready
        self.trigger_drops(completedDrops)

        # Foreach
        if foreach:
            logger.info("Invoking 'foreach' on each drop")
            for drop,_ in droputils.breadFirstTraverse(self._roots):
                foreach(drop)
            logger.info("'foreach' invoked for each drop")

        # Append proxies
        logger.info("Creating %d drop proxies", len(self._proxyinfo))
        for nm, host, port, local_uid, relname, remote_uid in self._proxyinfo:
            proxy = DropProxy(nm, host, port, self._sessionId, remote_uid)
            method = getattr(self._drops[local_uid], relname)
            method(proxy, False)

        self.status = SessionStates.RUNNING
        logger.info("Session %s is now RUNNING", self._sessionId)
Exemplo n.º 7
0
    def run(self):
        os.system('touch "%s"' % self.output().path)


class DepTask(DependencyTriggeredTask):
    def requires(self):
        return [SimpleTask()]

    def output(self):
        return luigi.LocalTarget('/tmp/b-created')

    def run(self):
        os.system('touch "%s"' % self.output().path)


if __name__ == '__main__':
    os.system('rm -f /tmp/a-created /tmp/b-created')
    interface.setup_interface_logging()
    sch = scheduler.CentralPlannerScheduler()
    w = worker.Worker(scheduler=sch)
    w.add(SimpleTask())
    w.run()
    w.add(DepTask())
    w.run()
    os.system('rm -f /tmp/a-created /tmp/b-created')
    w.add(DepTask())
    w.run()
    os.system('rm -f /tmp/a-created')
    w.add(DepTask())
    w.run()
Exemplo n.º 8
0
 def create_worker(self, scheduler, worker_processes):
     return worker.Worker(
         scheduler=scheduler, worker_processes=worker_processes)