Пример #1
0
    def start(self):
        """
        Start worker processes and a control loop
        """
        title = 'oq-zworkerpool %s' % self.ctrl_url[6:]  # strip tcp://
        print('Starting ' + title, file=sys.stderr)
        setproctitle(title)
        # start workers
        self.workers = []
        for _ in range(self.num_workers):
            sock = z.Socket(self.task_server_url, z.zmq.PULL, 'connect')
            sock.proc = multiprocessing.Process(target=worker,
                                                args=(sock, self.executing))
            sock.proc.start()
            self.workers.append(sock)

        # start control loop accepting the commands stop and kill
        with z.Socket(self.ctrl_url, z.zmq.REP, 'bind') as ctrlsock:
            for cmd in ctrlsock:
                if cmd in ('stop', 'kill'):
                    msg = getattr(self, cmd)()
                    ctrlsock.send(msg)
                    break
                elif cmd == 'getpid':
                    ctrlsock.send(self.proc.pid)
                elif cmd == 'get_num_workers':
                    ctrlsock.send(self.num_workers)
                elif cmd == 'get_executing':
                    ctrlsock.send(' '.join(sorted(os.listdir(self.executing))))
        shutil.rmtree(self.executing)
Пример #2
0
    def start(self):
        """
        Start worker processes and a control loop
        """
        setproctitle('oq-zworkerpool %s' % self.ctrl_url[6:])  # strip tcp://
        # start workers
        self.workers = []
        for _ in range(self.num_workers):
            sock = z.Socket(self.task_server_url, z.zmq.PULL, 'connect')
            proc = multiprocessing.Process(target=self.worker, args=(sock, ))
            proc.start()
            sock.pid = proc.pid
            self.workers.append(sock)

        # start control loop accepting the commands stop and kill
        with z.Socket(self.ctrl_url, z.zmq.REP, 'bind') as ctrlsock:
            for cmd in ctrlsock:
                if cmd in ('stop', 'kill'):
                    msg = getattr(self, cmd)()
                    ctrlsock.send(msg)
                    break
                elif cmd == 'getpid':
                    ctrlsock.send(self.pid)
                elif cmd == 'get_num_workers':
                    ctrlsock.send(self.num_workers)
                elif cmd == 'get_executing':
                    ctrlsock.send(self.executing.value)
Пример #3
0
 def set_concurrent_tasks_default(calc):
     """
     Set the default for concurrent_tasks based on the available
     worker pools .
     """
     num_workers = 0
     w = config.zworkers
     if w.host_cores:
         host_cores = [hc.split() for hc in w.host_cores.split(',')]
     else:
         host_cores = []
     for host, _cores in host_cores:
         url = 'tcp://%s:%s' % (host, w.ctrl_port)
         with z.Socket(url, z.zmq.REQ, 'connect') as sock:
             if not general.socket_ready(url):
                 logging.warning('%s is not running', host)
                 continue
             num_workers += sock.send('get_num_workers')
     if num_workers == 0:
         num_workers = os.cpu_count()
         logging.warning(
             'Missing host_cores, no idea about how many cores '
             'are available, using %d', num_workers)
     parallel.CT = num_workers * 2
     OqParam.concurrent_tasks.default = num_workers * 2
     logging.warning('Using %d zmq workers', num_workers)
Пример #4
0
    def start(self):
        """
        Start database worker threads
        """
        # give a nice name to the process
        w.setproctitle('oq-dbserver')

        dworkers = []
        for _ in range(self.num_workers):
            sock = z.Socket(self.backend, z.zmq.REP, 'connect')
            threading.Thread(target=self.dworker, args=(sock, )).start()
            dworkers.append(sock)
        logging.warning('DB server started with %s on %s, pid %d',
                        sys.executable, self.frontend, self.pid)
        if ZMQ:
            # start task_in->task_server streamer thread
            threading.Thread(target=w._streamer, daemon=True).start()
            logging.warning('Task streamer started on port %d',
                            int(config.zworkers.ctrl_port) + 1)
        # start frontend->backend proxy for the database workers
        try:
            z.zmq.proxy(z.bind(self.frontend, z.zmq.ROUTER),
                        z.bind(self.backend, z.zmq.DEALER))
        except (KeyboardInterrupt, z.zmq.ContextTerminated):
            for sock in dworkers:
                sock.running = False
                sock.zsocket.close()
            logging.warning('DB server stopped')
        finally:
            self.stop()
Пример #5
0
 def worker(self, sock):
     """
     :param sock: a zeromq.Socket of kind PULL receiving (cmd, args)
     """
     setproctitle('oq-zworker')
     for cmd, args in sock:
         backurl = args[-1].backurl  # attached to the monitor
         with z.Socket(backurl, z.zmq.PUSH, 'connect') as s:
             s.send(safely_call(cmd, args))
Пример #6
0
def _starmap(func, iterargs, host, task_in_port, receiver_ports):
    # called by parallel.Starmap.submit_all; should not be used directly
    receiver_url = 'tcp://%s:%s' % (host, receiver_ports)
    task_in_url = 'tcp://%s:%s' % (host, task_in_port)
    with z.Socket(receiver_url, z.zmq.PULL, 'bind') as receiver:
        logging.info('Receiver port for %s=%s', func.__name__, receiver.port)
        receiver_host = receiver.end_point.rsplit(':', 1)[0]
        backurl = '%s:%s' % (receiver_host, receiver.port)
        with z.Socket(task_in_url, z.zmq.PUSH, 'connect') as sender:
            n = 0
            for args in iterargs:
                args[-1].backurl = backurl  # args[-1] is a Monitor instance
                sender.send((func, args))
                n += 1
        yield n
        for _ in range(n):
            obj = receiver.zsocket.recv_pyobj()
            # receive n responses for the n requests sent
            yield obj
Пример #7
0
 def inspect(self):
     executing = []
     for host, _ in self.host_cores:
         if self.status(host)[0][1] == 'not-running':
             print('%s not running' % host)
             continue
         ctrl_url = 'tcp://%s:%s' % (host, self.ctrl_port)
         with z.Socket(ctrl_url, z.zmq.REQ, 'connect') as sock:
             tasks = sock.send('get_executing')
             executing.append((host, tasks))
     return executing
Пример #8
0
 def kill(self):
     """
     Send a "kill" command to all worker pools
     """
     killed = []
     for host, _ in self.host_cores:
         if self.status(host)[0][1] == 'not-running':
             print('%s not running' % host)
             continue
         ctrl_url = 'tcp://%s:%s' % (host, self.ctrl_port)
         with z.Socket(ctrl_url, z.zmq.REQ, 'connect') as sock:
             sock.send('kill')
             killed.append(host)
     return 'killed %s' % killed
Пример #9
0
def dbcmd(action, *args):
    """
    A dispatcher to the database server.

    :param action: database action to perform
    :param args: arguments
    """
    sock = zeromq.Socket('tcp://%s:%s' % (config.dbserver.host, DBSERVER_PORT),
                         zeromq.zmq.REQ, 'connect')
    with sock:
        res = sock.send((action,) + args)
        if isinstance(res, parallel.Result):
            return res.get()
    return res
Пример #10
0
 def status(self):
     """
     :returns: a list [(host, running, total), ...]
     """
     executing = []
     for host, _cores in self.host_cores:
         if not general.socket_ready((host, self.ctrl_port)):
             continue
         ctrl_url = 'tcp://%s:%s' % (host, self.ctrl_port)
         with z.Socket(ctrl_url, z.zmq.REQ, 'connect') as sock:
             running = len(sock.send('get_executing').split())
             total = sock.send('get_num_workers')
             executing.append((host, running, total))
     return executing
Пример #11
0
def dbcmd(action, *args):
    """
    A dispatcher to the database server.

    :param action: database action to perform
    :param args: arguments
    """
    sock = zeromq.Socket('tcp://%s:%s' % (config.dbserver.host, DBSERVER_PORT),
                         zeromq.zmq.REQ, 'connect')
    with sock:
        res, etype, _mon = sock.send((action, ) + args)
    if etype:
        raise etype(res)
    return res
Пример #12
0
 def stop(self):
     """
     Send a "stop" command to all worker pools
     """
     stopped = []
     for host, _ in self.host_cores:
         if self.status(host)[0][1] == 'not-running':
             print('%s not running' % host)
             continue
         ctrl_url = 'tcp://%s:%s' % (host, self.ctrl_port)
         with z.Socket(ctrl_url, z.zmq.REQ, 'connect') as sock:
             sock.send('stop')
             stopped.append(host)
     if hasattr(self, 'streamer'):
         self.streamer.terminate()
     return 'stopped %s' % stopped
Пример #13
0
 def kill(self):
     """
     Send a "kill" command to all worker pools
     """
     killed = []
     for host, _ in self.host_cores:
         if not general.socket_ready((host, self.ctrl_port)):
             continue
         ctrl_url = 'tcp://%s:%s' % (host, self.ctrl_port)
         with z.Socket(ctrl_url, z.zmq.REQ, 'connect') as sock:
             sock.send('kill')
             killed.append(host)
     for popen in self.popens:
         popen.kill()
     self.popens = []
     return 'killed %s' % killed
Пример #14
0
 def set_concurrent_tasks_default(job_id):
     """
     Set the default for concurrent_tasks based on the available
     worker pools .
     """
     num_workers = 0
     w = config.zworkers
     for host, _cores in [hc.split() for hc in w.host_cores.split(',')]:
         url = 'tcp://%s:%s' % (host, w.ctrl_port)
         with z.Socket(url, z.zmq.REQ, 'connect') as sock:
             if not general.socket_ready(url):
                 logs.LOG.warn('%s is not running', host)
                 continue
             num_workers += sock.send('get_num_workers')
     OqParam.concurrent_tasks.default = num_workers * 2
     logs.LOG.warn('Using %d zmq workers', num_workers)
Пример #15
0
def dbcmd(action, *args):
    """
    A dispatcher to the database server.

    :param action: database action to perform
    :param args: arguments
    """
    global sock
    if sock is None:
        sock = zeromq.Socket(
            'tcp://%s:%s' % (config.dbserver.host, DBSERVER_PORT),
            zeromq.zmq.REQ, 'connect').__enter__()
        # the socket will be closed when the calculation ends
    res = sock.send((action, ) + args)
    if isinstance(res, parallel.Result):
        return res.get()
    return res
Пример #16
0
 def stop(self):
     """
     Send a "stop" command to all worker pools
     """
     stopped = []
     for host, _ in self.host_cores:
         if not general.socket_ready((host, self.ctrl_port)):
             continue
         ctrl_url = 'tcp://%s:%s' % (host, self.ctrl_port)
         with z.Socket(ctrl_url, z.zmq.REQ, 'connect') as sock:
             sock.send('stop')
             stopped.append(host)
     for popen in self.popens:
         popen.terminate()
         # since we are not consuming any output from the spawned process
         # we must call wait() after terminate() to have Popen()
         # fully deallocate the process file descriptors, otherwise
         # zombies will arise
         popen.wait()
     self.popens = []
     return 'stopped %s' % stopped
Пример #17
0
    def start(self):
        """
        Start database worker threads
        """
        # give a nice name to the process
        w.setproctitle('oq-dbserver')

        dworkers = []
        for _ in range(self.num_workers):
            sock = z.Socket(self.backend, z.zmq.REP, 'connect')
            threading.Thread(target=self.dworker, args=(sock,)).start()
            dworkers.append(sock)
        logging.warning('DB server started with %s on %s, pid %d',
                        sys.executable, self.frontend, self.pid)
        if ZMQ:
            # start task_in->task_out streamer thread
            c = config.zworkers
            threading.Thread(
                target=w._streamer,
                args=(self.master_host, c.task_in_port, c.task_out_port)
            ).start()
            logging.warning('Task streamer started from %s -> %s',
                            c.task_in_port, c.task_out_port)

            # start zworkers and wait a bit for them
            msg = self.master.start()
            logging.warning(msg)
            time.sleep(1)

        # start frontend->backend proxy for the database workers
        try:
            z.zmq.proxy(z.bind(self.frontend, z.zmq.ROUTER),
                        z.bind(self.backend, z.zmq.DEALER))
        except (KeyboardInterrupt, z.zmq.ZMQError):
            for sock in dworkers:
                sock.running = False
                sock.zsocket.close()
            logging.warning('DB server stopped')
        finally:
            self.stop()
    def run_calc(self):
        """
        Run a calculation and return results (reinvented from openquake.calculators.base)
        """

        with self.calculator._monitor:
            self.calculator._monitor.username = ''
            try:
                # Pre-execute setups
                self.calculator.pre_execute()

                #self.calculator.datastore.swmr_on()
                oq = self.calculator.oqparam
                dstore = self.calculator.datastore
                self.calculator.set_param()
                self.calculator.offset = 0

                # Source model
                print('self.__dict__ = ')
                print(self.calculator.__dict__)
                if oq.hazard_calculation_id:  # from ruptures
                    dstore.parent = self.calculator.datastore.read(
                        oq.hazard_calculation_id)
                elif hasattr(self.calculator, 'csm'):  # from sources
                    self.calculator_build_events_from_sources()
                    #self.calculator.build_events_from_sources()
                    if (oq.ground_motion_fields is False
                            and oq.hazard_curves_from_gmfs is False):
                        return {}
                elif 'rupture_model' not in oq.inputs:
                    logging.warning(
                        'There is no rupture_model, the calculator will just '
                        'import data without performing any calculation')
                    fake = logictree.FullLogicTree.fake()
                    dstore['full_lt'] = fake  # needed to expose the outputs
                    dstore['weights'] = [1.]
                    return {}
                else:  # scenario
                    self.calculator._read_scenario_ruptures()
                    if (oq.ground_motion_fields is False
                            and oq.hazard_curves_from_gmfs is False):
                        return {}

                # Intensity measure models
                if oq.ground_motion_fields:
                    imts = oq.get_primary_imtls()
                    nrups = len(dstore['ruptures'])
                    base.create_gmf_data(dstore, imts, oq.get_sec_imts())
                    dstore.create_dset('gmf_data/sigma_epsilon',
                                       getters.sig_eps_dt(oq.imtls))
                    dstore.create_dset('gmf_data/time_by_rup',
                                       getters.time_dt, (nrups, ),
                                       fillvalue=None)

                # Prepare inputs for GmfGetter
                nr = len(dstore['ruptures'])
                logging.info('Reading {:_d} ruptures'.format(nr))
                rgetters = getters.get_rupture_getters(
                    dstore,
                    oq.concurrent_tasks * 1.25,
                    srcfilter=self.calculator.srcfilter)
                args = [(rgetter, self.calculator.param)
                        for rgetter in rgetters]
                mon = performance.Monitor()
                mon.version = version
                mon.config = config
                rcvr = 'tcp://%s:%s' % (config.dbserver.listen,
                                        config.dbserver.receiver_ports)
                skt = zeromq.Socket(rcvr, zeromq.zmq.PULL, 'bind').__enter__()
                mon.backurl = 'tcp://%s:%s' % (config.dbserver.host, skt.port)
                mon = mon.new(operation='total ' +
                              self.calculator.core_task.__func__.__name__,
                              measuremem=True)
                mon.weight = getattr(args[0], 'weight',
                                     1.)  # used in task_info
                mon.task_no = 1  # initialize the task number
                args += (mon, )

                self.args = args
                self.mon = mon
                self.dstore = dstore

            finally:
                print('FetchOpenQuake: OpenQuake Hazard Calculator defined.')