def test_delete_job_counters_deletes_counters_for_job(self): """ The progress indication counters for a given job are deleted. """ kvs = self.connect() args = [(55, "h", "a/b/c"), (55, "h", "d/e/f")] for data in args: stats.incr_counter(*data) stats.delete_job_counters(55) self.assertEqual(0, len(kvs.keys("oqs:55:*")))
def test_incr_counter(self): """ The counter is incremented for the given key """ args = (44, "h", "d/x/z", "i") kvs = self.connect() key = stats.key_name(*args) previous_value = kvs.get(key) previous_value = int(previous_value) if previous_value else 0 stats.incr_counter(*args[:-1]) value = int(kvs.get(key)) self.assertEqual(1, (value - previous_value))
def test_delete_job_counters_resets_counters(self): """ The progress indication counters for a given job are reset. """ kvs = self.connect() args = [(66, "h", "g/h/i", "i"), (66, "h", "j/k/l", "i")] for data in args: stats.incr_counter(*data[:-1]) stats.delete_job_counters(66) # The counters have been reset, after incrementing we expect them all # to have a value of "1". for data in args: stats.incr_counter(*data[:-1]) self.assertEqual("1", kvs.get(stats.key_name(*data)))
def test_failure_counters_with_no_area(self): # Failure counters are returned for all computation areas if the # 'area' parameter is omitted. stats.delete_job_counters(123) fcname = itertools.cycle(string.ascii_lowercase) for cidx, carea in enumerate(["g", "h", "r"]): stats.incr_counter(123, carea, "%s-failures" % fcname.next()) if not (cidx % 2): stats.incr_counter(123, carea, "%s-failures" % fcname.next()) self.assertEqual( [('oqs/123/g/a-failures/i', 1), ('oqs/123/g/b-failures/i', 1), ('oqs/123/h/c-failures/i', 1), ('oqs/123/r/d-failures/i', 1), ('oqs/123/r/e-failures/i', 1)], sorted(stats.failure_counters(123)))
def test_failure_counters_with_no_area(self): # Failure counters are returned for all computation areas if the # 'area' parameter is omitted. stats.delete_job_counters(123) fcname = itertools.cycle(string.ascii_lowercase) for cidx, carea in enumerate(["g", "h", "r"]): stats.incr_counter(123, carea, "%s:failed" % fcname.next()) if not (cidx % 2): stats.incr_counter(123, carea, "%s:failed" % fcname.next()) self.assertEqual([('oqs/123/g/a:failed/i', 1), ('oqs/123/g/b:failed/i', 1), ('oqs/123/h/c:failed/i', 1), ('oqs/123/r/d:failed/i', 1), ('oqs/123/r/e:failed/i', 1)], sorted(stats.failure_counters(123)))
def test_failure_counters_with_valid_area(self): # Failure counters are returned for valid computation areas. stats.delete_job_counters(123) fcname = itertools.cycle(string.ascii_lowercase) for cidx, carea in enumerate(["g", "h", "r"]): stats.incr_counter(123, carea, "%s-failures" % fcname.next()) if not (cidx % 2): stats.incr_counter(123, carea, "%s-failures" % fcname.next()) self.assertEqual([('oqs/123/g/a-failures/i', 1), ('oqs/123/g/b-failures/i', 1)], sorted(stats.failure_counters(123, "g"))) self.assertEqual([('oqs/123/h/c-failures/i', 1)], sorted(stats.failure_counters(123, "h"))) self.assertEqual([('oqs/123/r/d-failures/i', 1), ('oqs/123/r/e-failures/i', 1)], sorted(stats.failure_counters(123, "r")))
def test_failure_counters_with_valid_area(self): # Failure counters are returned for valid computation areas. stats.delete_job_counters(123) fcname = itertools.cycle(string.ascii_lowercase) for cidx, carea in enumerate(["g", "h", "r"]): stats.incr_counter(123, carea, "%s:failed" % fcname.next()) if not (cidx % 2): stats.incr_counter(123, carea, "%s:failed" % fcname.next()) self.assertEqual( [('oqs/123/g/a:failed/i', 1), ('oqs/123/g/b:failed/i', 1)], sorted(stats.failure_counters(123, "g"))) self.assertEqual([('oqs/123/h/c:failed/i', 1)], sorted(stats.failure_counters(123, "h"))) self.assertEqual( [('oqs/123/r/d:failed/i', 1), ('oqs/123/r/e:failed/i', 1)], sorted(stats.failure_counters(123, "r")))
def do_curves(self, sites, realizations, serializer=None, the_task=tasks.compute_hazard_curve): """Trigger the calculation of hazard curves, serialize as requested. The calculated curves will only be serialized if the `serializer` parameter is not `None`. :param sites: The sites for which to calculate hazard curves. :type sites: list of :py:class:`openquake.shapes.Site` :param realizations: The number of realizations to calculate :type realizations: :py:class:`int` :param serializer: A serializer for the calculated hazard curves, receives the KVS keys of the calculated hazard curves in its single parameter. :type serializer: a callable with a single parameter: list of strings :param the_task: The `celery` task to use for the hazard curve calculation, it takes the following parameters: * job ID * the sites for which to calculate the hazard curves * the logic tree realization number :type the_task: a callable taking three parameters :returns: KVS keys of the calculated hazard curves. :rtype: list of string """ source_model_generator = random.Random() source_model_generator.seed(self["SOURCE_MODEL_LT_RANDOM_SEED"]) gmpe_generator = random.Random() gmpe_generator.seed(self["GMPE_LT_RANDOM_SEED"]) for realization in xrange(0, realizations): stats.incr_counter(self.job_id, "classical:do_curves:realization") LOG.info("Calculating hazard curves for realization %s" % realization) self.store_source_model(source_model_generator.getrandbits(32)) self.store_gmpe_map(source_model_generator.getrandbits(32)) utils_tasks.distribute( self.number_of_tasks(), the_task, ("sites", sites), dict(job_id=self.job_id, realization=realization), flatten_results=True, ath=serializer, )
def test_actions_after_job_process_failures(self): # the job process is running but has some failure counters above zero # shorten the delay to checking failure counters supervisor.SupervisorLogMessageConsumer.FCC_DELAY = 2 self.is_pid_running.return_value = True self.get_job_status.return_value = 'running' stats.delete_job_counters(self.job.id) stats.incr_counter(self.job.id, "h", "a-failures") stats.incr_counter(self.job.id, "r", "b-failures") stats.incr_counter(self.job.id, "r", "b-failures") supervisor.supervise(1, self.job.id, timeout=0.1) # the job process is terminated self.assertEqual(1, self.terminate_job.call_count) self.assertEqual(((1,), {}), self.terminate_job.call_args) # stop time is recorded self.assertEqual(1, self.record_job_stop_time.call_count) self.assertEqual( ((self.job.id,), {}), self.record_job_stop_time.call_args) # the cleanup is triggered self.assertEqual(1, self.cleanup_after_job.call_count) self.assertEqual( ((self.job.id,), {}), self.cleanup_after_job.call_args)
def test_remaining_tasks_in_block_nonzero_start_count(self): # Same as the above test, except test with the start_count # set to something > 0 (to simulate a mid-calculation block). incr_count = lambda: stats.incr_counter( self.job_id, 'h', 'compute_uhs_task') # Just for variety, set 5 successful and 5 failed task counters: for _ in xrange(5): stats.incr_counter(self.job_id, 'h', 'compute_uhs_task') for _ in xrange(5): stats.incr_counter(self.job_id, 'h', 'compute_uhs_task-failures') # count starts at 10 gen = remaining_tasks_in_block(self.job_id, 4, 10) self.assertEqual(4, gen.next()) incr_count() self.assertEqual(3, gen.next()) incr_count() incr_count() self.assertEqual(1, gen.next()) incr_count() self.assertRaises(StopIteration, gen.next)
def test_remaining_tasks_in_block(self): # Tasks should be submitted to works for one block (of sites) at a # time. For each block, we want to look at Redis counters to determine # when the block is finished calculating. # `remaining_tasks_in_block` is a generator that yields the remaining # number of tasks in a block. When there are no more tasks left in the # block, a `StopIteration` is raised. gen = remaining_tasks_in_block(self.job_id, 4, 0) incr_count = lambda: stats.incr_counter( self.job_id, 'h', 'compute_uhs_task') self.assertEqual(4, gen.next()) incr_count() self.assertEqual(3, gen.next()) incr_count() incr_count() self.assertEqual(1, gen.next()) incr_count() self.assertRaises(StopIteration, gen.next)
def test_actions_after_job_process_failures(self): # the job process is running but has some failure counters above zero # shorten the delay to checking failure counters supervisor.SupervisorLogMessageConsumer.FCC_DELAY = 2 self.is_pid_running.return_value = True self.get_job_status.return_value = 'running' stats.delete_job_counters(123) stats.incr_counter(123, "h", "a:failed") stats.incr_counter(123, "r", "b:failed") stats.incr_counter(123, "r", "b:failed") supervisor.supervise(1, 123, timeout=0.1) # the job process is terminated self.assertEqual(1, self.terminate_job.call_count) self.assertEqual(((1,), {}), self.terminate_job.call_args) # stop time is recorded self.assertEqual(1, self.record_job_stop_time.call_count) self.assertEqual(((123,), {}), self.record_job_stop_time.call_args) # the cleanup is triggered self.assertEqual(1, self.cleanup_after_job.call_count) self.assertEqual(((123,), {}), self.cleanup_after_job.call_args)
def test_complete_task_count_success(self): stats.incr_counter(self.job_id, 'h', 'compute_uhs_task') self.assertEqual(1, completed_task_count(self.job_id))
def test_complete_task_count_failures(self): stats.incr_counter(self.job_id, 'h', 'compute_uhs_task-failures') self.assertEqual(1, completed_task_count(self.job_id))
def test_complete_task_count_success_and_fail(self): # Test `complete_task_count` with success and fail counters: stats.incr_counter(self.job_id, 'h', 'compute_uhs_task') stats.incr_counter(self.job_id, 'h', 'compute_uhs_task-failures') self.assertEqual(2, completed_task_count(self.job_id))