def test_timeout_without_job_cancellation(self): timeout = 4 spark_trials = SparkTrials(parallelism=1, timeout=timeout) spark_trials._spark_supports_job_cancelling = False def fn(x): time.sleep(0.5) return x with patch_logger('hyperopt-spark', logging.DEBUG) as output: fmin(fn=fn, space=hp.uniform('x', -1, 1), algo=anneal.suggest, max_evals=10, trials=spark_trials, max_queue_len=1, show_progressbar=False, return_argmin=False) log_output = output.getvalue().strip() self.assertTrue(spark_trials._fmin_cancelled) self.assertEqual(spark_trials._fmin_cancelled_reason, "fmin run timeout") self.assertGreater(spark_trials.count_successful_trials(), 0) self.assertGreater(spark_trials.count_cancelled_trials(), 0) self.assertIn( "fmin is cancelled, so new trials will not be launched", log_output, """ "fmin is cancelled, so new trials will not be launched" missing from log: {log_output}""".format(log_output=log_output)) self.assertIn( "SparkTrials will block", log_output, """ "SparkTrials will block" missing from log: {log_output}""". format(log_output=log_output)) self.assert_task_succeeded(log_output, 0)
def test_timeout_with_job_cancellation(self): if not self.sparkSupportsJobCancelling(): print( "Skipping timeout test since this Apache PySpark version does not " "support cancelling jobs by job group ID.") return timeout = 2 spark_trials = SparkTrials(parallelism=4, timeout=timeout) def fn(x): if x < 0: time.sleep(timeout + 20) raise Exception("Task should have been cancelled") else: time.sleep(1) return x # Test 1 cancelled trial. Examine logs. with patch_logger("hyperopt-spark", logging.DEBUG) as output: fmin( fn=fn, space=hp.uniform("x", -2, 0), algo=anneal.suggest, max_evals=1, trials=spark_trials, max_queue_len=1, show_progressbar=False, return_argmin=False, rstate=np.random.RandomState(4), ) log_output = output.getvalue().strip() self.assertTrue(spark_trials._fmin_cancelled) self.assertEqual(spark_trials._fmin_cancelled_reason, "fmin run timeout") self.assertEqual(spark_trials.count_cancelled_trials(), 1) self.assertIn( "Cancelling all running jobs", log_output, """ "Cancelling all running jobs" missing from log: {log_output}""" .format(log_output=log_output), ) self.assertIn( "trial task 0 cancelled", log_output, """ "trial task 0 cancelled" missing from log: {log_output}""". format(log_output=log_output), ) self.assertNotIn( "Task should have been cancelled", log_output, """ "Task should have been cancelled" should not in log: {log_output}""".format(log_output=log_output), ) self.assert_task_failed(log_output, 0) # Test mix of successful and cancelled trials. spark_trials = SparkTrials(parallelism=4, timeout=4) fmin( fn=fn, space=hp.uniform("x", -0.25, 5), algo=anneal.suggest, max_evals=6, trials=spark_trials, max_queue_len=1, show_progressbar=False, return_argmin=True, rstate=np.random.RandomState(4), ) time.sleep(2) self.assertTrue(spark_trials._fmin_cancelled) self.assertEqual(spark_trials._fmin_cancelled_reason, "fmin run timeout") # There are 2 finished trials, 1 cancelled running trial and 1 cancelled # new trial. We do not need to check the new trial since it is not started yet. self.assertGreaterEqual( spark_trials.count_successful_trials(), 1, "Expected at least 1 successful trial but found none.", ) self.assertGreaterEqual( spark_trials.count_cancelled_trials(), 1, "Expected at least 1 cancelled trial but found none.", )