def task_complete_callback(body, message): """ :param dict body: ``body`` is the message sent by the task. The dict should contain 2 keys: `job_id` and `num_sources` (to indicate the number of sources computed). Both values are `int`. :param message: A :class:`kombu.transport.pyamqplib.Message`, which contains metadata about the message (including content type, channel, etc.). See kombu docs for more details. """ job_id = body['job_id'] num_sources = body['num_sources'] assert job_id == job.id progress['computed'] += num_sources logs.log_percent_complete(job_id, "hazard") # Once we receive a completion signal, enqueue the next # piece of work (if there's anything left to be done). try: self.core_calc_task.apply_async(task_gen.next()) except StopIteration: # There are no more tasks to dispatch; now we just need # to wait until all tasks signal completion. pass message.ack()
def execute(self): """Entry point to trigger the computation.""" random_generator = java.jclass( "Random")(int(self.job_ctxt.params["GMF_RANDOM_SEED"])) encoder = json.JSONEncoder() kvs_client = kvs.get_client() num_calculations = self._number_of_calculations() self.initialize_pr_data(num_calculations=num_calculations) for cnum in xrange(num_calculations): try: gmf = self.compute_ground_motion_field(random_generator) stats.pk_inc(self.job_ctxt.job_id, "nhzrd_done", 1) except: # Count failure stats.pk_inc(self.job_ctxt.job_id, "nhzrd_failed", 1) raise logs.log_percent_complete(self.job_ctxt.job_id, "hazard") imt = self.job_ctxt.params["INTENSITY_MEASURE_TYPE"] self._serialize_gmf(gmf, imt, cnum) for gmv in gmf_to_dict(gmf, imt): site = shapes.Site(gmv["site_lon"], gmv["site_lat"]) key = kvs.tokens.ground_motion_values_key( self.job_ctxt.job_id, site) kvs_client.rpush(key, encoder.encode(gmv))
def execute(self): """Entry point to trigger the computation.""" random_generator = java.jclass("Random")(int( self.job_ctxt.params["GMF_RANDOM_SEED"])) encoder = json.JSONEncoder() kvs_client = kvs.get_client() num_calculations = self._number_of_calculations() self.initialize_pr_data(num_calculations=num_calculations) for cnum in xrange(num_calculations): try: gmf = self.compute_ground_motion_field(random_generator) stats.pk_inc(self.job_ctxt.job_id, "nhzrd_done", 1) except: # Count failure stats.pk_inc(self.job_ctxt.job_id, "nhzrd_failed", 1) raise logs.log_percent_complete(self.job_ctxt.job_id, "hazard") imt = self.job_ctxt.params["INTENSITY_MEASURE_TYPE"] self._serialize_gmf(gmf, imt, cnum) for gmv in gmf_to_dict(gmf, imt): site = shapes.Site(gmv["site_lon"], gmv["site_lat"]) key = kvs.tokens.ground_motion_values_key( self.job_ctxt.job_id, site) kvs_client.rpush(key, encoder.encode(gmv))
def execute(self): """Main hazard processing block. Loops through various random realizations, spawning tasks to compute GMFs.""" source_model_generator = random.Random() source_model_generator.seed( self.job_ctxt['SOURCE_MODEL_LT_RANDOM_SEED']) gmpe_generator = random.Random() gmpe_generator.seed(self.job_ctxt['GMPE_LT_RANDOM_SEED']) gmf_generator = random.Random() gmf_generator.seed(self.job_ctxt['GMF_RANDOM_SEED']) histories = self.job_ctxt['NUMBER_OF_SEISMICITY_HISTORIES'] realizations = self.job_ctxt['NUMBER_OF_LOGIC_TREE_SAMPLES'] self.initialize_pr_data(num_calculations=histories * realizations) LOG.info( "Going to run hazard for %s histories of %s realizations each." % (histories, realizations)) for i in range(0, histories): pending_tasks = [] for j in range(0, realizations): self.store_source_model(source_model_generator.getrandbits(32)) self.store_gmpe_map(gmpe_generator.getrandbits(32)) pending_tasks.append( compute_ground_motion_fields.delay( self.job_ctxt.job_id, self.job_ctxt.sites_to_compute(), i, realization=j, seed=gmf_generator.getrandbits(32))) for each_task in pending_tasks: each_task.wait() if each_task.status != 'SUCCESS': raise Exception(each_task.result) logs.log_percent_complete(self.job_ctxt.job_id, "hazard") for j in range(0, realizations): stochastic_set_key = kvs.tokens.stochastic_set_key( self.job_ctxt.job_id, i, j) LOG.info("Writing output for ses %s" % stochastic_set_key) ses = kvs.get_value_json_decoded(stochastic_set_key) if ses: self.serialize_gmf(ses)
def test_log_percent_complete_with_invalid_area(self): # nothing is reported, -1 is returned job_id = 11 with mock.patch("openquake.logs.log_progress") as lpm: rv = logs.log_percent_complete(job_id, "invalid calculation") self.assertEqual(-1, rv) self.assertEqual(0, lpm.call_count)
def test_log_percent_complete_with_zero_percent_done(self): # nothing is reported since the percentage complete value is zero job_id = 13 stats.pk_set(job_id, "nhzrd_total", 100) stats.pk_set(job_id, "nhzrd_done", 0) stats.pk_set(job_id, "lvr", -1) with mock.patch("openquake.logs.log_progress") as lpm: rv = logs.log_percent_complete(job_id, "hazard") self.assertEqual(0, rv) self.assertEqual(0, lpm.call_count)
def test_log_percent_complete_with_almost_same_percentage_value(self): # only 1 value is reported when the percentage complete value is # almost the same (12.6 versus 12). job_id = 12 stats.pk_set(job_id, "nhzrd_total", 366) stats.pk_set(job_id, "nhzrd_done", 46) stats.pk_set(job_id, "lvr", 12) with mock.patch("openquake.logs.log_progress") as lpm: rv = logs.log_percent_complete(job_id, "hazard") self.assertEqual(12, rv) self.assertEqual(0, lpm.call_count)
def test_log_percent_complete_with_new_percentage_value(self): # the percentage complete is reported since it exceeds the last value # reported job_id = 14 stats.pk_set(job_id, "nhzrd_total", 100) stats.pk_set(job_id, "nhzrd_done", 20) stats.pk_set(job_id, "lvr", 12) with mock.patch("openquake.logs.log_progress") as lpm: rv = logs.log_percent_complete(job_id, "hazard") self.assertEqual(20, rv) self.assertEqual(1, lpm.call_count) self.assertEqual("hazard 20% complete", lpm.call_args_list[0][0][0])
def uhs_task_handler(job_id, num_tasks, start_count): """Async task handler for counting calculation results and determining when a batch of tasks is complete. This function periodically polls the task counters in Redis and blocks until the current block of tasks is finished. :param int job_id: The ID of the currently running job. :param int num_tasks: The number of tasks in the current block. :param int start_count: The number of tasks completed so far in the job. """ remaining_gen = remaining_tasks_in_block(job_id, num_tasks, start_count) while True: time.sleep(0.5) try: remaining_gen.next() except StopIteration: # No more tasks remaining in this batch. break logs.log_percent_complete(job_id, "hazard")
def distribute_disagg(self, sites, realizations, poes, result_dir): """Compute disaggregation by splitting up the calculation over sites, realizations, and PoE values. :param the_job: JobContext definition :type the_job: :class:`openquake.engine.JobContext` instance :param sites: List of :class:`openquake.shapes.Site` objects :param poes: Probability of Exceedence levels for the calculation :type poes: List of floats :param result_dir: Path where full disaggregation results should be stored :returns: Result data in the following form:: [(realization_1, poe_1, [(site_1, gmv_1, matrix_path_1), (site_2, gmv_2, matrix_path_2)] ), (realization_1, poe_2, [(site_1, gmv_1, matrix_path_3), (site_2, gmv_2, matrix_path_4)] ), ... (realization_N, poe_N, [(site_1, gmv_1, matrix_path_N-1), (site_2, gmv_2, matrix_path_N)] ), ] A single matrix result in this form looks like this:: [(1, 0.1, [(Site(0.0, 0.0), 0.2257, '/var/lib/openquake/disagg-results/job-372/some_guid.h5'),] ), ] """ # accumulates the final results of this method full_da_results = [] # accumulates task data across the realization and poe loops task_data = [] src_model_rnd = random.Random() src_model_rnd.seed(self.job_ctxt['SOURCE_MODEL_LT_RANDOM_SEED']) gmpe_rnd = random.Random() gmpe_rnd.seed(self.job_ctxt['GMPE_LT_RANDOM_SEED']) for rlz in xrange(1, realizations + 1): # 1 to N, inclusive # cache the source model and gmpe model in the KVS # so the Java code can access it general.store_source_model(self.job_ctxt.job_id, src_model_rnd.getrandbits(32), self.job_ctxt.params, self.calc) general.store_gmpe_map(self.job_ctxt.job_id, gmpe_rnd.getrandbits(32), self.calc) for poe in poes: task_site_pairs = [] for site in sites: a_task = compute_disagg_matrix_task.delay( self.job_ctxt.job_id, rlz, poe, result_dir, site=site) task_site_pairs.append((a_task, site)) task_data.append((rlz, poe, task_site_pairs)) for rlz, poe, task_site_pairs in task_data: # accumulates all data for a given (realization, poe) pair rlz_poe_data = [] for a_task, site in task_site_pairs: a_task.wait() if not a_task.successful(): msg = ( "Full Disaggregation matrix computation task" " for job %s with task_id=%s, realization=%s, PoE=%s," " site=%s has failed with the following error: %s") msg %= (self.job_ctxt.job_id, a_task.task_id, rlz, poe, site, a_task.result) LOG.critical(msg) raise RuntimeError(msg) else: gmv, matrix_path = a_task.result rlz_poe_data.append((site, gmv, matrix_path)) logs.log_percent_complete(self.job_ctxt.job_id, "hazard") full_da_results.append((rlz, poe, rlz_poe_data)) return full_da_results
def ath(self, sites, rtype, datum=None): """ Write calculation results to the database. :param sites: the sites for which to write calculation results. :type sites: list of :py:class:`openquake.shapes.Site` :param str rtype: hazard curve type, one of: curve, mean, quantile :param datum: one of: realization, None, quantile """ def pause_generator(value): """ Returns the initial value when called for the first time and the double value upon each subsequent invocation. N.B.: the maximum value returned will never exceed 90 (seconds). """ yield value while True: if value < 45: value *= 2 yield value sites = set(sites) accounted_for = set() min_pause = 0.1 pgen = pause_generator(min_pause) pause = pgen.next() key_template, nrml_path, hc_meta = psha_exp.hcs_meta( self.job_ctxt, rtype, datum) curve_writer = hazard_output.HazardCurveDBWriter( nrml_path, self.job_ctxt.job_id) while accounted_for != sites: failures = stats.failure_counters(self.job_ctxt.job_id, "h") if failures: raise RuntimeError("hazard failures (%s), aborting" % failures) hc_data = [] # Sleep a little before checking the availability of additional # hazard curve results. time.sleep(pause) results_found = 0 for site in sites: if site in accounted_for: continue value = kvs.get_value_json_decoded(key_template % hash(site)) if value is None: # No value yet, proceed to next site. continue # Use hazard curve ordinate values (PoE) from KVS and abscissae # from the IML list in config. hc_attrib = { 'investigationTimeSpan': self.job_ctxt['INVESTIGATION_TIME'], 'IMLValues': self.job_ctxt.imls, 'IMT': self.job_ctxt['INTENSITY_MEASURE_TYPE'], 'PoEValues': value } hc_attrib.update(hc_meta) hc_data.append((site, hc_attrib)) accounted_for.add(site) results_found += 1 if not results_found: # No results found, increase the sleep pause. pause = pgen.next() else: curve_writer.serialize(hc_data) pause *= 0.8 pause = min_pause if pause < min_pause else pause logs.log_percent_complete(self.job_ctxt.job_id, "hazard") return nrml_path
def ath(self, sites, rtype, datum=None): """ Write calculation results to the database. :param sites: the sites for which to write calculation results. :type sites: list of :py:class:`openquake.shapes.Site` :param str rtype: hazard curve type, one of: curve, mean, quantile :param datum: one of: realization, None, quantile """ def pause_generator(value): """ Returns the initial value when called for the first time and the double value upon each subsequent invocation. N.B.: the maximum value returned will never exceed 90 (seconds). """ yield value while True: if value < 45: value *= 2 yield value sites = set(sites) accounted_for = set() min_pause = 0.1 pgen = pause_generator(min_pause) pause = pgen.next() key_template, nrml_path, hc_meta = psha_exp.hcs_meta( self.job_ctxt, rtype, datum) curve_writer = hazard_output.HazardCurveDBWriter( nrml_path, self.job_ctxt.job_id) while accounted_for != sites: failures = stats.failure_counters(self.job_ctxt.job_id, "h") if failures: raise RuntimeError("hazard failures (%s), aborting" % failures) hc_data = [] # Sleep a little before checking the availability of additional # hazard curve results. time.sleep(pause) results_found = 0 for site in sites: if site in accounted_for: continue value = kvs.get_value_json_decoded(key_template % hash(site)) if value is None: # No value yet, proceed to next site. continue # Use hazard curve ordinate values (PoE) from KVS and abscissae # from the IML list in config. hc_attrib = { 'investigationTimeSpan': self.job_ctxt['INVESTIGATION_TIME'], 'IMLValues': self.job_ctxt.imls, 'IMT': self.job_ctxt['INTENSITY_MEASURE_TYPE'], 'PoEValues': value} hc_attrib.update(hc_meta) hc_data.append((site, hc_attrib)) accounted_for.add(site) results_found += 1 if not results_found: # No results found, increase the sleep pause. pause = pgen.next() else: curve_writer.serialize(hc_data) pause *= 0.8 pause = min_pause if pause < min_pause else pause logs.log_percent_complete(self.job_ctxt.job_id, "hazard") return nrml_path
def distribute_disagg(self, sites, realizations, poes, result_dir): """Compute disaggregation by splitting up the calculation over sites, realizations, and PoE values. :param the_job: JobContext definition :type the_job: :class:`openquake.engine.JobContext` instance :param sites: List of :class:`openquake.shapes.Site` objects :param poes: Probability of Exceedence levels for the calculation :type poes: List of floats :param result_dir: Path where full disaggregation results should be stored :returns: Result data in the following form:: [(realization_1, poe_1, [(site_1, gmv_1, matrix_path_1), (site_2, gmv_2, matrix_path_2)] ), (realization_1, poe_2, [(site_1, gmv_1, matrix_path_3), (site_2, gmv_2, matrix_path_4)] ), ... (realization_N, poe_N, [(site_1, gmv_1, matrix_path_N-1), (site_2, gmv_2, matrix_path_N)] ), ] A single matrix result in this form looks like this:: [(1, 0.1, [(Site(0.0, 0.0), 0.2257, '/var/lib/openquake/disagg-results/job-372/some_guid.h5'),] ), ] """ # accumulates the final results of this method full_da_results = [] # accumulates task data across the realization and poe loops task_data = [] src_model_rnd = random.Random() src_model_rnd.seed(self.job_ctxt['SOURCE_MODEL_LT_RANDOM_SEED']) gmpe_rnd = random.Random() gmpe_rnd.seed(self.job_ctxt['GMPE_LT_RANDOM_SEED']) for rlz in xrange(1, realizations + 1): # 1 to N, inclusive # cache the source model and gmpe model in the KVS # so the Java code can access it general.store_source_model(self.job_ctxt.job_id, src_model_rnd.getrandbits(32), self.job_ctxt.params, self.calc) general.store_gmpe_map( self.job_ctxt.job_id, gmpe_rnd.getrandbits(32), self.calc) for poe in poes: task_site_pairs = [] for site in sites: a_task = compute_disagg_matrix_task.delay( self.job_ctxt.job_id, rlz, poe, result_dir, site=site) task_site_pairs.append((a_task, site)) task_data.append((rlz, poe, task_site_pairs)) for rlz, poe, task_site_pairs in task_data: # accumulates all data for a given (realization, poe) pair rlz_poe_data = [] for a_task, site in task_site_pairs: a_task.wait() if not a_task.successful(): msg = ( "Full Disaggregation matrix computation task" " for job %s with task_id=%s, realization=%s, PoE=%s," " site=%s has failed with the following error: %s") msg %= ( self.job_ctxt.job_id, a_task.task_id, rlz, poe, site, a_task.result) LOG.critical(msg) raise RuntimeError(msg) else: gmv, matrix_path = a_task.result rlz_poe_data.append((site, gmv, matrix_path)) logs.log_percent_complete(self.job_ctxt.job_id, "hazard") full_da_results.append((rlz, poe, rlz_poe_data)) return full_da_results