def test_case_1(self): self.assert_curves_ok( ['hazard_curve-PGA.csv', 'hazard_curve-SA(0.1).csv'], case_1.__file__) if parallel.oq_distribute() != 'no': info = text_table(view('job_info', self.calc.datastore)) self.assertIn('task', info) self.assertIn('sent', info) self.assertIn('received', info) slow = view('task:classical:-1', self.calc.datastore) self.assertIn('taskno', slow) self.assertIn('duration', slow) self.assertIn('sources', slow) # there is a single source self.assertEqual(len(self.calc.datastore['source_info']), 1) # check npz export export(('hcurves', 'npz'), self.calc.datastore) # check extraction sitecol = extract(self.calc.datastore, 'sitecol') self.assertEqual(len(sitecol.array), 1) # check minimum_magnitude discards the source with self.assertRaises(RuntimeError) as ctx: self.run_calc(case_1.__file__, 'job.ini', minimum_magnitude='4.5') self.assertEqual(str(ctx.exception), 'All sources were discarded!?')
def test_case_1(self): self.assert_curves_ok( ['hazard_curve-PGA.csv', 'hazard_curve-SA(0.1).csv'], case_1.__file__) if parallel.oq_distribute() != 'no': info = view('job_info', self.calc.datastore) self.assertIn('task', info) self.assertIn('sent', info) self.assertIn('received', info) # there is a single source self.assertEqual(len(self.calc.datastore['source_info']), 1) # check npz export export(('hcurves', 'npz'), self.calc.datastore) # check extraction sitecol = extract(self.calc.datastore, 'sitecol') self.assertEqual(repr(sitecol), '<SiteCollection with 1/1 sites>') # check minimum_magnitude discards the source with self.assertRaises(RuntimeError) as ctx: self.run_calc(case_1.__file__, 'job.ini', minimum_magnitude='4.5') self.assertEqual(str(ctx.exception), 'All sources were filtered away!')
def test_case_1(self): self.assert_curves_ok( ['hazard_curve-PGA.csv', 'hazard_curve-SA(0.1).csv'], case_1.__file__) if parallel.oq_distribute() != 'no': info = view('job_info', self.calc.datastore) self.assertIn('task', info) self.assertIn('sent', info) self.assertIn('received', info) # there is a single source self.assertEqual(len(self.calc.datastore['source_info']), 1) # check npz export export(('hcurves', 'npz'), self.calc.datastore) # check extraction sitecol = extract(self.calc.datastore, 'sitecol') self.assertEqual(len(sitecol.array), 1) # check minimum_magnitude discards the source with self.assertRaises(RuntimeError) as ctx: self.run_calc(case_1.__file__, 'job.ini', minimum_magnitude='4.5') self.assertEqual(str(ctx.exception), 'All sources were filtered away!')
def test_case_1(self): self.assert_curves_ok( ['hazard_curve-smltp_b1-gsimltp_b1.csv'], case_1.__file__) if parallel.oq_distribute() != 'no': # make sure we saved the data transfer information in job_info keys = set(self.calc.datastore['job_info'].__dict__) self.assertIn('classical_max_received_per_task', keys) self.assertIn('classical_tot_received', keys) self.assertIn('classical_sent', keys) # there is a single source self.assertEqual(len(self.calc.datastore['source_info']), 1)
def test_case_1(self): self.assert_curves_ok( ['hazard_curve-smltp_b1-gsimltp_b1.csv'], case_1.__file__) if parallel.oq_distribute() != 'no': # make sure we saved the data transfer information in job_info keys = {decode(key) for key in dict( self.calc.datastore['job_info'])} self.assertIn('classical.received', keys) self.assertIn('classical.sent', keys) # there is a single source self.assertEqual(len(self.calc.datastore['source_info']), 1) # check npz export export(('hcurves', 'npz'), self.calc.datastore)
def test_case_1(self): self.assert_curves_ok( ['hazard_curve-PGA.csv', 'hazard_curve-SA(0.1).csv'], case_1.__file__) if parallel.oq_distribute() != 'no': info = view('job_info', self.calc.datastore) self.assertIn('task', info) self.assertIn('sent', info) self.assertIn('received', info) # there is a single source self.assertEqual(len(self.calc.datastore['source_info']), 1) # check npz export export(('hcurves', 'npz'), self.calc.datastore) # check extraction sitecol = extract(self.calc.datastore, 'sitecol') self.assertEqual(repr(sitecol), '<SiteCollection with 1/1 sites>')
def test_case_1(self): self.assert_curves_ok( ['hazard_curve-PGA.csv', 'hazard_curve-SA(0.1).csv'], case_1.__file__) if parallel.oq_distribute() != 'no': # make sure we saved the data transfer information in job_info keys = {decode(key) for key in dict( self.calc.datastore['job_info'])} self.assertIn('classical.received', keys) self.assertIn('classical.sent', keys) # there is a single source self.assertEqual(len(self.calc.datastore['source_info']), 1) # check npz export export(('hcurves', 'npz'), self.calc.datastore) # check extraction sitecol = extract(self.calc.datastore, 'sitecol') self.assertEqual(repr(sitecol), '<SiteCollection with 1/1 sites>')
from setproctitle import setproctitle except ImportError: def setproctitle(title): "Do nothing" from urllib.request import urlopen, Request from openquake.baselib.python3compat import decode from openquake.baselib import ( parallel, general, config, __version__, zeromq as z) from openquake.commonlib.oqvalidation import OqParam from openquake.commonlib import readinput, oqzip from openquake.calculators import base, views, export from openquake.commonlib import logs OQ_API = 'https://api.openquake.org' TERMINATE = config.distribution.terminate_workers_on_revoke OQ_DISTRIBUTE = parallel.oq_distribute() MB = 1024 ** 2 _PID = os.getpid() # the PID _PPID = os.getppid() # the controlling terminal PID GET_JOBS = '''--- executing or submitted SELECT * FROM job WHERE status IN ('executing', 'submitted') AND is_running=1 AND pid > 0 ORDER BY id''' if OQ_DISTRIBUTE == 'zmq': def set_concurrent_tasks_default(job_id): """ Set the default for concurrent_tasks based on the available worker pools .
def run_jobs(job_inis, log_level='info', log_file=None, exports='', username=getpass.getuser(), **kw): """ Run jobs using the specified config file and other options. :param str job_inis: A list of paths to .ini files, or a list of job dictionaries :param str log_level: 'debug', 'info', 'warn', 'error', or 'critical' :param str log_file: Path to log file. :param exports: A comma-separated string of export types requested by the user. :param username: Name of the user running the job :param kw: Extra parameters like hazard_calculation_id and calculation_mode """ dist = parallel.oq_distribute() jobparams = [] multi = kw.pop('multi', None) loglvl = getattr(logging, log_level.upper()) jobs = create_jobs(job_inis, loglvl, kw) hc_id = kw.pop('hazard_calculation_id', None) for job in jobs: job_id = job['_job_id'] with logs.handle(job_id, log_level, log_file): oqparam = readinput.get_oqparam(job, hc_id=hc_id, **kw) logs.dbcmd( 'update_job', job_id, dict(calculation_mode=oqparam.calculation_mode, description=oqparam.description, user_name=username, hazard_calculation_id=hc_id)) if (not jobparams and not multi and hc_id is None and 'sensitivity_analysis' not in job): hc_id = job_id jobparams.append((job_id, oqparam)) jobarray = len(jobparams) > 1 and multi try: poll_queue(job_id, poll_time=15) # wait for an empty slot or a CTRL-C except BaseException: # the job aborted even before starting for job_id, oqparam in jobparams: logs.dbcmd('finish', job_id, 'aborted') return jobparams else: for job_id, oqparam in jobparams: dic = {'status': 'executing', 'pid': _PID} if jobarray: dic['hazard_calculation_id'] = jobparams[0][0] logs.dbcmd('update_job', job_id, dic) try: if dist == 'zmq' and config.zworkers['host_cores']: logging.info('Asking the DbServer to start the workers') logs.dbcmd('zmq_start') # start the zworkers logs.dbcmd('zmq_wait') # wait for them to go up allargs = [(job_id, oqparam, exports, log_level, log_file) for job_id, oqparam in jobparams] if jobarray: with general.start_many(run_calc, allargs): pass else: for args in allargs: run_calc(*args) finally: if dist == 'zmq' and config.zworkers['host_cores']: logging.info('Stopping the zworkers') logs.dbcmd('zmq_stop') elif dist.startswith('celery'): celery_cleanup(config.distribution.terminate_workers_on_revoke) return jobparams
from openquake.baselib.performance import Monitor from openquake.baselib.python3compat import urlopen, Request, decode from openquake.baselib import (parallel, general, config, datastore, __version__, zeromq as z) from openquake.commonlib.oqvalidation import OqParam from openquake.commonlib import readinput from openquake.calculators import base, views, export from openquake.commonlib import logs OQ_API = 'https://api.openquake.org' TERMINATE = config.distribution.terminate_workers_on_revoke USE_CELERY = os.environ.get('OQ_DISTRIBUTE') == 'celery' if parallel.oq_distribute() == 'zmq': def set_concurrent_tasks_default(): """ Set the default for concurrent_tasks based on the available worker pools . """ num_workers = 0 w = config.zworkers for host, _cores in [hc.split() for hc in w.host_cores.split(',')]: url = 'tcp://%s:%s' % (host, w.ctrl_port) with z.Socket(url, z.zmq.REQ, 'connect') as sock: if not general.socket_ready(url): logs.LOG.warn('%s is not running', host) continue num_workers += sock.send('get_num_workers')
def run_jobs(job_inis, log_level='info', log_file=None, exports='', username=getpass.getuser(), **kw): """ Run jobs using the specified config file and other options. :param str job_inis: A list of paths to .ini files. :param str log_level: 'debug', 'info', 'warn', 'error', or 'critical' :param str log_file: Path to log file. :param exports: A comma-separated string of export types requested by the user. :param username: Name of the user running the job :param kw: Extra parameters like hazard_calculation_id and calculation_mode """ dist = parallel.oq_distribute() jobparams = [] for job_ini in job_inis: # NB: the logs must be initialized BEFORE everything job_id = logs.init('job', getattr(logging, log_level.upper())) with logs.handle(job_id, log_level, log_file): oqparam = eng.job_from_file(os.path.abspath(job_ini), job_id, username, **kw) if (not jobparams and 'csm_cache' not in kw and 'hazard_calculation_id' not in kw): kw['hazard_calculation_id'] = job_id jobparams.append((job_id, oqparam)) jobarray = len(jobparams) > 1 and 'csm_cache' in kw try: eng.poll_queue(job_id, poll_time=15) # wait for an empty slot or a CTRL-C except BaseException: # the job aborted even before starting for job_id, oqparam in jobparams: logs.dbcmd('finish', job_id, 'aborted') return jobparams else: for job_id, oqparam in jobparams: dic = {'status': 'executing', 'pid': eng._PID} if jobarray: dic['hazard_calculation_id'] = jobparams[0][0] logs.dbcmd('update_job', job_id, dic) try: if dist == 'zmq' and config.zworkers['host_cores']: logging.info('Asking the DbServer to start the workers') logs.dbcmd('zmq_start') # start the zworkers logs.dbcmd('zmq_wait') # wait for them to go up allargs = [(job_id, oqparam, exports, log_level, log_file) for job_id, oqparam in jobparams] if jobarray: with start_many(eng.run_calc, allargs): pass else: for args in allargs: eng.run_calc(*args) finally: if dist == 'zmq' and config.zworkers['host_cores']: logging.info('Stopping the zworkers') logs.dbcmd('zmq_stop') elif dist.startswith('celery'): eng.celery_cleanup(config.distribution.terminate_workers_on_revoke) return jobparams
def setUpClass(cls): parallel.Starmap.init() # initialize the pool if parallel.oq_distribute() == 'zmq': err = workerpool.check_status() if err: raise unittest.SkipTest(err)