def __init__(self, db, address, num_workers=5): self.db = db self.master_host = address[0] self.frontend = 'tcp://%s:%s' % address self.backend = 'inproc://dbworkers' self.num_workers = num_workers self.pid = os.getpid() self.master = w.WorkerMaster(config.dbserver.host, **config.zworkers)
def workers(cmd): """ start/stop/restart the workers, or return their status """ if config.dbserver.multi_user and getpass.getuser() != 'openquake': sys.exit('oq workers only works in single user mode') master = workerpool.WorkerMaster(**config.zworkers) print(getattr(master, cmd)())
def __init__(self, db, address, num_workers=5): self.db = db self.frontend = 'tcp://%s:%s' % address self.backend = 'inproc://dbworkers' self.num_workers = num_workers self.pid = os.getpid() if ZMQ: self.zmaster = w.WorkerMaster(**config.zworkers) else: self.zmaster = None
def main(cmd): """ start/stop/restart the workers, or return their status """ if (cmd not in ro_commands and config.dbserver.multi_user and getpass.getuser() not in 'openquake michele'): sys.exit('oq workers only works in single user mode') if oqdist == 'zmq': zmaster = w.WorkerMaster(**config.zworkers) pprint(getattr(zmaster, cmd)()) else: print('Nothing to do: oq_distribute=%s' % oqdist)
def run_calc(job_id, oqparam, exports, hazard_calculation_id=None, **kw): """ Run a calculation. :param job_id: ID of the current job :param oqparam: :class:`openquake.commonlib.oqvalidation.OqParam` instance :param exports: A comma-separated string of export types. """ register_signals() setproctitle('oq-job-%d' % job_id) calc = base.calculators(oqparam, calc_id=job_id) logging.info('%s running %s [--hc=%s]', getpass.getuser(), calc.oqparam.inputs['job_ini'], calc.oqparam.hazard_calculation_id) logging.info('Using engine version %s', __version__) msg = check_obsolete_version(oqparam.calculation_mode) if msg: logs.LOG.warn(msg) calc.from_engine = True tb = 'None\n' try: if not oqparam.hazard_calculation_id: if 'input_zip' in oqparam.inputs: # starting from an archive with open(oqparam.inputs['input_zip'], 'rb') as arch: data = numpy.array(arch.read()) else: logs.LOG.info('Zipping the input files') bio = io.BytesIO() oqzip.zip_job(oqparam.inputs['job_ini'], bio, (), oqparam, logging.debug) data = numpy.array(bio.getvalue()) del bio calc.datastore['input/zip'] = data calc.datastore.set_attrs('input/zip', nbytes=data.nbytes) del data # save memory poll_queue(job_id, _PID, poll_time=15) if OQ_DISTRIBUTE == 'zmq': # start zworkers master = w.WorkerMaster(config.dbserver.listen, **config.zworkers) logs.dbcmd('start_zworkers', master) logging.info('WorkerPool %s', master.wait_pools(seconds=30)) if OQ_DISTRIBUTE.startswith(('celery', 'zmq')): set_concurrent_tasks_default(job_id) t0 = time.time() calc.run(exports=exports, hazard_calculation_id=hazard_calculation_id, close=False, **kw) logs.LOG.info('Exposing the outputs to the database') expose_outputs(calc.datastore) duration = time.time() - t0 records = views.performance_view(calc.datastore, add_calc_id=False) logs.dbcmd('save_performance', job_id, records) calc.datastore.close() logs.LOG.info('Calculation %d finished correctly in %d seconds', job_id, duration) logs.dbcmd('finish', job_id, 'complete') except BaseException as exc: if isinstance(exc, MasterKilled): msg = 'aborted' else: msg = 'failed' tb = traceback.format_exc() try: logs.LOG.critical(tb) logs.dbcmd('finish', job_id, msg) except BaseException: # an OperationalError may always happen sys.stderr.write(tb) raise finally: # if there was an error in the calculation, this part may fail; # in such a situation, we simply log the cleanup error without # taking further action, so that the real error can propagate if OQ_DISTRIBUTE == 'zmq': # stop zworkers logs.dbcmd('stop_zworkers', master) try: if OQ_DISTRIBUTE.startswith('celery'): celery_cleanup(TERMINATE) except BaseException: # log the finalization error only if there is no real error if tb == 'None\n': logs.LOG.error('finalizing', exc_info=True) return calc