def run_calc(job_id, oqparam, log_level, log_file, exports, hazard_calculation_id=None, **kw): """ Run a calculation. :param job_id: ID of the current job :param oqparam: :class:`openquake.commonlib.oqvalidation.OqParam` instance :param str log_level: The desired logging level. Valid choices are 'debug', 'info', 'progress', 'warn', 'error', and 'critical'. :param str log_file: Complete path (including file name) to file where logs will be written. If `None`, logging will just be printed to standard output. :param exports: A comma-separated string of export types. """ monitor = Monitor('total runtime', measuremem=True) with logs.handle(job_id, log_level, log_file): # run the job if USE_CELERY and os.environ.get('OQ_DISTRIBUTE') == 'celery': set_concurrent_tasks_default() calc = base.calculators(oqparam, monitor, calc_id=job_id) calc.from_engine = True tb = 'None\n' try: logs.dbcmd('set_status', job_id, 'executing') _do_run_calc(calc, exports, hazard_calculation_id, **kw) expose_outputs(calc.datastore) records = views.performance_view(calc.datastore) logs.dbcmd('save_performance', job_id, records) calc.datastore.close() logs.LOG.info('Calculation %d finished correctly in %d seconds', job_id, calc._monitor.duration) logs.dbcmd('finish', job_id, 'complete') except: tb = traceback.format_exc() try: logs.LOG.critical(tb) logs.dbcmd('finish', job_id, 'failed') except: # an OperationalError may always happen sys.stderr.write(tb) raise finally: # if there was an error in the calculation, this part may fail; # in such a situation, we simply log the cleanup error without # taking further action, so that the real error can propagate try: if USE_CELERY: celery_cleanup(TERMINATE, parallel.Starmap.task_ids) except: # log the finalization error only if there is no real error if tb == 'None\n': logs.LOG.error('finalizing', exc_info=True) return calc
def run_job(job_ini, log_level='info', log_file=None, exports='', username=getpass.getuser(), **kw): """ Run a job using the specified config file and other options. :param str job_ini: Path to calculation config (INI-style) files. :param str log_level: 'debug', 'info', 'warn', 'error', or 'critical' :param str log_file: Path to log file. :param exports: A comma-separated string of export types requested by the user. :param username: Name of the user running the job :param kw: Extra parameters like hazard_calculation_id and calculation_mode """ job_id = logs.init('job', getattr(logging, log_level.upper())) with logs.handle(job_id, log_level, log_file): job_ini = os.path.abspath(job_ini) oqparam = eng.job_from_file(job_ini, job_id, username, **kw) kw['username'] = username eng.run_calc(job_id, oqparam, exports, **kw) for line in logs.dbcmd('list_outputs', job_id, False): safeprint(line) return job_id
def main(calc_id: int, aggregate_by): """ Re-run the postprocessing after an event based risk calculation """ parent = util.read(calc_id) oqp = parent['oqparam'] aggby = aggregate_by.split(',') for tagname in aggby: if tagname not in oqp.aggregate_by: raise ValueError('%r not in %s' % (tagname, oqp.aggregate_by)) job_id = logs.init('job', level=logging.INFO) dic = dict( calculation_mode='reaggregate', description=oqp.description + '[aggregate_by=%s]' % aggregate_by, user_name=getpass.getuser(), is_running=1, status='executing', pid=os.getpid(), hazard_calculation_id=job_id) logs.dbcmd('update_job', job_id, dic) if os.environ.get('OQ_DISTRIBUTE') not in ('no', 'processpool'): os.environ['OQ_DISTRIBUTE'] = 'processpool' with logs.handle(job_id, logging.INFO): oqp.hazard_calculation_id = parent.calc_id parallel.Starmap.init() prc = PostRiskCalculator(oqp, job_id) try: prc.run(aggregate_by=aggby) engine.expose_outputs(prc.datastore) logs.dbcmd('finish', job_id, 'complete') except Exception: logs.dbcmd('finish', job_id, 'failed') finally: parallel.Starmap.shutdown()
def run_calc(job_id, oqparam, exports, log_level='info', log_file=None, **kw): """ Run a calculation. :param job_id: ID of the current job :param oqparam: :class:`openquake.commonlib.oqvalidation.OqParam` instance :param exports: A comma-separated string of export types. """ register_signals() setproctitle('oq-job-%d' % job_id) logs.init(job_id, getattr(logging, log_level.upper())) with logs.handle(job_id, log_level, log_file): calc = base.calculators(oqparam, calc_id=job_id) logging.info('%s running %s [--hc=%s]', getpass.getuser(), calc.oqparam.inputs['job_ini'], calc.oqparam.hazard_calculation_id) logging.info('Using engine version %s', __version__) msg = check_obsolete_version(oqparam.calculation_mode) if msg: logging.warning(msg) calc.from_engine = True tb = 'None\n' try: if OQ_DISTRIBUTE.endswith('pool'): logging.warning('Using %d cores on %s', parallel.CT // 2, platform.node()) set_concurrent_tasks_default(calc) t0 = time.time() calc.run(exports=exports, **kw) logging.info('Exposing the outputs to the database') expose_outputs(calc.datastore) path = calc.datastore.filename size = general.humansize(os.path.getsize(path)) logging.info('Stored %s on %s in %d seconds', size, path, time.time() - t0) logs.dbcmd('finish', job_id, 'complete') calc.datastore.close() for line in logs.dbcmd('list_outputs', job_id, False): general.safeprint(line) except BaseException as exc: if isinstance(exc, MasterKilled): msg = 'aborted' else: msg = 'failed' tb = traceback.format_exc() try: logging.critical(tb) logs.dbcmd('finish', job_id, msg) except BaseException: # an OperationalError may always happen sys.stderr.write(tb) raise finally: parallel.Starmap.shutdown() # sanity check to make sure that the logging on file is working if log_file and log_file != os.devnull and os.path.getsize(log_file) == 0: logging.warning('The log file %s is empty!?' % log_file) return calc
def recompute_losses(calc_id, aggregate_by): """Re-run the postprocessing after an event based risk calculation""" parent = util.read(calc_id) oqp = parent['oqparam'] aggby = aggregate_by.split(',') for tagname in aggby: if tagname not in oqp.aggregate_by: raise ValueError('%r not in %s' % (tagname, oqp.aggregate_by)) job_id = logs.init('job', level=logging.INFO) if os.environ.get('OQ_DISTRIBUTE') not in ('no', 'processpool'): os.environ['OQ_DISTRIBUTE'] = 'processpool' with logs.handle(job_id, logging.INFO): oqp.hazard_calculation_id = calc_id parallel.Starmap.init() prc = PostRiskCalculator(oqp, job_id) try: prc.run(aggregate_by=aggby) finally: parallel.Starmap.shutdown()
def run_jobs(job_inis, log_level='info', log_file=None, exports='', username=getpass.getuser(), **kw): """ Run jobs using the specified config file and other options. :param str job_inis: A list of paths to .ini files, or a list of job dictionaries :param str log_level: 'debug', 'info', 'warn', 'error', or 'critical' :param str log_file: Path to log file. :param exports: A comma-separated string of export types requested by the user. :param username: Name of the user running the job :param kw: Extra parameters like hazard_calculation_id and calculation_mode """ dist = parallel.oq_distribute() jobparams = [] multi = kw.pop('multi', None) loglvl = getattr(logging, log_level.upper()) jobs = create_jobs(job_inis, loglvl, kw) hc_id = kw.pop('hazard_calculation_id', None) for job in jobs: job_id = job['_job_id'] with logs.handle(job_id, log_level, log_file): oqparam = readinput.get_oqparam(job, hc_id=hc_id, **kw) logs.dbcmd( 'update_job', job_id, dict(calculation_mode=oqparam.calculation_mode, description=oqparam.description, user_name=username, hazard_calculation_id=hc_id)) if (not jobparams and not multi and hc_id is None and 'sensitivity_analysis' not in job): hc_id = job_id jobparams.append((job_id, oqparam)) jobarray = len(jobparams) > 1 and multi try: poll_queue(job_id, poll_time=15) # wait for an empty slot or a CTRL-C except BaseException: # the job aborted even before starting for job_id, oqparam in jobparams: logs.dbcmd('finish', job_id, 'aborted') return jobparams else: for job_id, oqparam in jobparams: dic = {'status': 'executing', 'pid': _PID} if jobarray: dic['hazard_calculation_id'] = jobparams[0][0] logs.dbcmd('update_job', job_id, dic) try: if dist == 'zmq' and config.zworkers['host_cores']: logging.info('Asking the DbServer to start the workers') logs.dbcmd('zmq_start') # start the zworkers logs.dbcmd('zmq_wait') # wait for them to go up allargs = [(job_id, oqparam, exports, log_level, log_file) for job_id, oqparam in jobparams] if jobarray: with general.start_many(run_calc, allargs): pass else: for args in allargs: run_calc(*args) finally: if dist == 'zmq' and config.zworkers['host_cores']: logging.info('Stopping the zworkers') logs.dbcmd('zmq_stop') elif dist.startswith('celery'): celery_cleanup(config.distribution.terminate_workers_on_revoke) return jobparams
def run_jobs(job_inis, log_level='info', log_file=None, exports='', username=getpass.getuser(), **kw): """ Run jobs using the specified config file and other options. :param str job_inis: A list of paths to .ini files, or a list of job dictionaries :param str log_level: 'debug', 'info', 'warn', 'error', or 'critical' :param str log_file: Path to log file. :param exports: A comma-separated string of export types requested by the user. :param username: Name of the user running the job :param kw: Extra parameters like hazard_calculation_id and calculation_mode """ jobparams = [] multi = kw.pop('multi', None) loglvl = getattr(logging, log_level.upper()) jobs = create_jobs(job_inis, loglvl, kw) # inizialize the logs if kw.get('hazard_calculation_id'): hc_id = int(kw['hazard_calculation_id']) else: hc_id = None for job in jobs: job_id = job['_job_id'] job['hazard_calculation_id'] = hc_id with logs.handle(job_id, log_level, log_file): dic = dict(calculation_mode=job['calculation_mode'], description=job['description'], user_name=username, is_running=1) if hc_id: dic['hazard_calculation_id'] = hc_id logs.dbcmd('update_job', job_id, dic) if (not jobparams and not multi and 'hazard_calculation_id' not in kw and 'sensitivity_analysis' not in job): hc_id = job_id try: oqparam = readinput.get_oqparam(job) except BaseException: tb = traceback.format_exc() logging.critical(tb) logs.dbcmd('finish', job_id, 'failed') raise jobparams.append((job_id, oqparam)) jobarray = len(jobparams) > 1 and multi try: poll_queue(job_id, poll_time=15) # wait for an empty slot or a CTRL-C except BaseException: # the job aborted even before starting for job_id, oqparam in jobparams: logs.dbcmd('finish', job_id, 'aborted') return jobparams else: for job_id, oqparam in jobparams: dic = {'status': 'executing', 'pid': _PID} if jobarray: dic['hazard_calculation_id'] = jobparams[0][0] logs.dbcmd('update_job', job_id, dic) try: if config.zworkers['host_cores'] and parallel.workers_status() == []: logging.info('Asking the DbServer to start the workers') logs.dbcmd('workers_start') # start the workers allargs = [(job_id, oqparam, exports, log_level, log_file) for job_id, oqparam in jobparams] if jobarray: with general.start_many(run_calc, allargs): pass else: for args in allargs: run_calc(*args) finally: if config.zworkers['host_cores']: logging.info('Stopping the workers') parallel.workers_stop() return jobparams
def run_calc(job_id, oqparam, log_level, log_file, exports, hazard_calculation_id=None, **kw): """ Run a calculation. :param job_id: ID of the current job :param oqparam: :class:`openquake.commonlib.oqvalidation.OqParam` instance :param str log_level: The desired logging level. Valid choices are 'debug', 'info', 'progress', 'warn', 'error', and 'critical'. :param str log_file: Complete path (including file name) to file where logs will be written. If `None`, logging will just be printed to standard output. :param exports: A comma-separated string of export types. """ setproctitle('oq-job-%d' % job_id) with logs.handle(job_id, log_level, log_file): # run the job calc = base.calculators(oqparam, calc_id=job_id) calc.set_log_format() # set the log format first of all logging.info('Running %s [--hc=%s]', calc.oqparam.inputs['job_ini'], calc.oqparam.hazard_calculation_id) logging.info('Using engine version %s', __version__) msg = check_obsolete_version(oqparam.calculation_mode) if msg: logs.LOG.warn(msg) if OQ_DISTRIBUTE.startswith(('celery', 'zmq')): set_concurrent_tasks_default(job_id) calc.from_engine = True input_zip = oqparam.inputs.get('input_zip') tb = 'None\n' try: if input_zip: # the input was zipped from the beginning data = open(input_zip, 'rb').read() else: # zip the input logs.LOG.info('zipping the input files') bio = io.BytesIO() zip(oqparam.inputs['job_ini'], bio, (), oqparam, logging.debug) data = bio.getvalue() calc.datastore['input_zip'] = numpy.array(data) calc.datastore.set_attrs('input_zip', nbytes=len(data)) logs.dbcmd('update_job', job_id, { 'status': 'executing', 'pid': _PID }) t0 = time.time() calc.run(exports=exports, hazard_calculation_id=hazard_calculation_id, close=False, **kw) logs.LOG.info('Exposing the outputs to the database') expose_outputs(calc.datastore) duration = time.time() - t0 calc._monitor.flush() records = views.performance_view(calc.datastore) logs.dbcmd('save_performance', job_id, records) calc.datastore.close() logs.LOG.info('Calculation %d finished correctly in %d seconds', job_id, duration) logs.dbcmd('finish', job_id, 'complete') except BaseException: tb = traceback.format_exc() try: logs.LOG.critical(tb) logs.dbcmd('finish', job_id, 'failed') except BaseException: # an OperationalError may always happen sys.stderr.write(tb) raise finally: # if there was an error in the calculation, this part may fail; # in such a situation, we simply log the cleanup error without # taking further action, so that the real error can propagate try: if OQ_DISTRIBUTE.startswith('celery'): celery_cleanup(TERMINATE, parallel.running_tasks) except BaseException: # log the finalization error only if there is no real error if tb == 'None\n': logs.LOG.error('finalizing', exc_info=True) return calc
def run_jobs(job_inis, log_level='info', log_file=None, exports='', username=getpass.getuser(), **kw): """ Run jobs using the specified config file and other options. :param str job_inis: A list of paths to .ini files. :param str log_level: 'debug', 'info', 'warn', 'error', or 'critical' :param str log_file: Path to log file. :param exports: A comma-separated string of export types requested by the user. :param username: Name of the user running the job :param kw: Extra parameters like hazard_calculation_id and calculation_mode """ dist = parallel.oq_distribute() jobparams = [] for job_ini in job_inis: # NB: the logs must be initialized BEFORE everything job_id = logs.init('job', getattr(logging, log_level.upper())) with logs.handle(job_id, log_level, log_file): oqparam = eng.job_from_file(os.path.abspath(job_ini), job_id, username, **kw) if (not jobparams and 'csm_cache' not in kw and 'hazard_calculation_id' not in kw): kw['hazard_calculation_id'] = job_id jobparams.append((job_id, oqparam)) jobarray = len(jobparams) > 1 and 'csm_cache' in kw try: eng.poll_queue(job_id, poll_time=15) # wait for an empty slot or a CTRL-C except BaseException: # the job aborted even before starting for job_id, oqparam in jobparams: logs.dbcmd('finish', job_id, 'aborted') return jobparams else: for job_id, oqparam in jobparams: dic = {'status': 'executing', 'pid': eng._PID} if jobarray: dic['hazard_calculation_id'] = jobparams[0][0] logs.dbcmd('update_job', job_id, dic) try: if dist == 'zmq' and config.zworkers['host_cores']: logging.info('Asking the DbServer to start the workers') logs.dbcmd('zmq_start') # start the zworkers logs.dbcmd('zmq_wait') # wait for them to go up allargs = [(job_id, oqparam, exports, log_level, log_file) for job_id, oqparam in jobparams] if jobarray: with start_many(eng.run_calc, allargs): pass else: for args in allargs: eng.run_calc(*args) finally: if dist == 'zmq' and config.zworkers['host_cores']: logging.info('Stopping the zworkers') logs.dbcmd('zmq_stop') elif dist.startswith('celery'): eng.celery_cleanup(config.distribution.terminate_workers_on_revoke) return jobparams