def test_spark_submit(self, mock_util): task = spark.SparkStandaloneTask(const.PRIORITY_0) task.spark_submit = spark.SPARK_SUBMIT self.assertEqual(task.spark_submit, spark.SPARK_SUBMIT) mock_util.readonlyFile.return_value = "/tmp/other-spark-submit" task.spark_submit = "other-spark-submit" self.assertEqual(task.spark_submit, "/tmp/other-spark-submit")
def test_working_directory_1(self, mock_util): task = spark.SparkStandaloneTask(const.PRIORITY_0) self.assertEqual(task.working_directory, None) # test setting working directory, should validate path mock_util.readwriteDirectory.side_effect = ValueError("Test") with self.assertRaises(ValueError): task.working_directory = "/failed/dir"
def test_run_exit_code_ok(self, mock_popen, mock_sleep): popen_instance = mock.Mock() popen_instance.poll.side_effect = [None, None, 0] mock_popen.return_value = popen_instance task = spark.SparkStandaloneTask(const.PRIORITY_0) task.run() self.assertEqual(mock_sleep.call_count, 3) mock_sleep.assert_called_with(task.timeout)
def test_run_exit_code_fail(self, mock_popen, mock_sleep): popen_instance = mock.Mock() popen_instance.poll.side_effect = [None, None, 127] mock_popen.return_value = popen_instance task = spark.SparkStandaloneTask(const.PRIORITY_0) with self.assertRaises(IOError): task.run() mock_sleep.assert_called_with(task.timeout)
def test_set_application_empty(self): task = spark.SparkStandaloneTask(const.PRIORITY_0) # use defaults task.set_application() self.assertEqual(task.name, spark.SPARK_APP_NAME) self.assertEqual(task.main_class, None) self.assertEqual(task.jar, None) self.assertEqual(task.spark_options, {}) self.assertEqual(task.job_options, [])
def test_web_url(self): task = spark.SparkStandaloneTask(const.PRIORITY_0) task.web_url = spark.SPARK_WEB_URL self.assertEqual(task.web_url, spark.SPARK_WEB_URL) with self.assertRaises(StandardError): task.web_url = None with self.assertRaises(StandardError): task.web_url = "abc" task.web_url = "http://1.1.1.1:8000" self.assertEqual(task.web_url, "http://1.1.1.1:8000")
def test_set_application_no_options(self, mock_util): task = spark.SparkStandaloneTask(const.PRIORITY_0) # set application options mock_util.readonlyFile.return_value = "/tmp/file.jar" task.set_application(name="test", main_class="Class", jar="file.jar") self.assertEqual(task.name, "test") self.assertEqual(task.main_class, "Class") self.assertEqual(task.jar, "/tmp/file.jar") self.assertEqual(task.spark_options, {}) self.assertEqual(task.job_options, [])
def test_serde(self): task = spark.SparkStandaloneTask(const.PRIORITY_0) new_task = cPickle.loads(cPickle.dumps(task)) self.assertEqual(new_task.priority, task.priority) self.assertEqual(new_task.working_directory, task.working_directory) self.assertEqual(new_task.name, task.name) self.assertEqual(new_task.main_class, task.main_class) self.assertEqual(new_task.jar, task.jar) self.assertEqual(new_task.spark_options, task.spark_options) self.assertEqual(new_task.job_options, task.job_options)
def test_launch_process_no_stdout(self, mock_popen): mock_popen.Popen = mock.Mock() # redefine Popen object task = spark.SparkStandaloneTask(const.PRIORITY_0) task.working_directory = None task.launch_process() mock_popen.Popen.assert_called_with(task.cmd(), bufsize=4096, stdout=None, stderr=None, close_fds=True)
def test_init(self): task = spark.SparkStandaloneTask(const.PRIORITY_0) self.assertEqual(task.priority, const.PRIORITY_0) self.assertEqual(task._spark_submit, spark.SPARK_SUBMIT) self.assertEqual(task._master_url, spark.SPARK_MASTER_URL) self.assertEqual(task._web_url, spark.SPARK_WEB_URL) self.assertEqual(task.name, spark.SPARK_APP_NAME) self.assertEqual(task.spark_options, {}) self.assertEqual(task.main_class, None) self.assertEqual(task.jar, None) self.assertEqual(task.job_options, [])
def test_cancel_with_ps_no_exitcode(self, mock_time, mock_popen): mock_time.sleep.return_value = None # remove sleep function mock_popen.Popen = mock.Mock() # redefine Popen object task = spark.SparkStandaloneTask(const.PRIORITY_0) task.working_directory = None task.launch_process() task._current_ps().poll.return_value = 127 exit_code = task.cancel() self.assertEqual(exit_code, 127) task._current_ps().terminate.assert_called_with() task._current_ps().kill.assert_has_calls([])
def test_set_application_with_options(self): task = spark.SparkStandaloneTask(const.PRIORITY_0) task.set_application(name="test", spark_options={ "spark.a": 1, "b": 2 }, job_options=["a", "b", 3]) self.assertEqual(task.name, "test") self.assertEqual(task.spark_options, {"spark.a": "1"}) self.assertEqual(task.job_options, ["a", "b", "3"])
def test_master_url(self): task = spark.SparkStandaloneTask(const.PRIORITY_0) task.master_url = spark.SPARK_MASTER_URL self.assertEqual(task.master_url, spark.SPARK_MASTER_URL) with self.assertRaises(ValueError): task.master_url = spark.SPARK_WEB_URL with self.assertRaises(StandardError): task.master_url = None with self.assertRaises(StandardError): task.master_url = "abc" task.master_url = "spark://sandbox:7077" self.assertEqual(task.master_url, "spark://sandbox:7077")
def test_cmd(self, mock_util): # should create cmd from default task task = spark.SparkStandaloneTask(const.PRIORITY_0) answer = [ spark.SPARK_SUBMIT, "--master", spark.SPARK_MASTER_URL, "--name", spark.SPARK_APP_NAME, "--class", "None", "None" ] self.assertEqual(task.cmd(), answer) # should create cmd with arbitrary options mock_util.readonlyFile.return_value = "/tmp/file" task = spark.SparkStandaloneTask(const.PRIORITY_0) task.spark_options = {"spark.a": 1} task.job_options = ["a", "b"] task.main_class = "Class" task.jar = "jar" answer = [ spark.SPARK_SUBMIT, "--master", spark.SPARK_MASTER_URL, "--name", spark.SPARK_APP_NAME, "--conf", "spark.a=1", "--class", "Class", "jar", "a", "b" ] self.assertEqual(task.cmd(), answer)
def test_launch_process_with_stdout(self, mock_util_open, mock_rw_dir, mock_popen): mock_popen.Popen = mock.Mock() # redefine Popen object mock_util_open.return_value = "stream" mock_rw_dir.return_value = "/tmp/work" task = spark.SparkStandaloneTask(const.PRIORITY_0) task.working_directory = "work" task.launch_process() mock_popen.Popen.assert_called_with(task.cmd(), bufsize=4096, stdout="stream", stderr="stream", close_fds=True) calls = [ mock.call("/tmp/work/stdout", "wb"), mock.call("/tmp/work/stderr", "wb") ] mock_util_open.assert_has_calls(calls)
def test_cancel_no_ps(self): task = spark.SparkStandaloneTask(const.PRIORITY_0) self.assertEqual(task.cancel(), None)
def test_working_directory_2(self, mock_util): task = spark.SparkStandaloneTask(const.PRIORITY_0) # should set fully resolved path mock_util.readwriteDirectory.return_value = "/tmp/work" task.working_directory = "work" self.assertEqual(task.working_directory, "/tmp/work")
def test_working_directory_3(self): task = spark.SparkStandaloneTask(const.PRIORITY_0) task.working_directory = None self.assertEqual(task.working_directory, None)
def test_run_no_ps(self, mock_sleep): task = spark.SparkStandaloneTask(const.PRIORITY_0) task.launch_process = mock.Mock() with self.assertRaises(AttributeError): task.run() mock_sleep.assert_called_with(task.timeout)
def test_priority(self): task = spark.SparkStandaloneTask(const.PRIORITY_0) self.assertEqual(task.priority, const.PRIORITY_0)