def test_process_spark_submit_log_k8s(self): # Given hook = SparkSubmitHook(conn_id='spark_k8s_cluster') log_lines = [ 'INFO LoggingPodStatusWatcherImpl:54 - State changed, new state:' + 'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' + 'namespace: default' + 'labels: spark-app-selector -> spark-465b868ada474bda82ccb84ab2747fcd,' + 'spark-role -> driver' + 'pod uid: ba9c61f6-205f-11e8-b65f-d48564c88e42' + 'creation time: 2018-03-05T10:26:55Z' + 'service account name: spark' + 'volumes: spark-init-properties, download-jars-volume,' + 'download-files-volume, spark-token-2vmlm' + 'node name: N/A' + 'start time: N/A' + 'container images: N/A' + 'phase: Pending' + 'status: []' + '2018-03-05 11:26:56 INFO LoggingPodStatusWatcherImpl:54 - State changed,' + ' new state:' + 'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' + 'namespace: default' + 'Exit code: 999' ] # When hook._process_spark_submit_log(log_lines) # Then self.assertEqual(hook._kubernetes_driver_pod, 'spark-pi-edf2ace37be7353a958b38733a12f8e6-driver') self.assertEqual(hook._spark_exit_code, 999)
def test_yarn_process_on_kill(self, mock_popen): # Given mock_popen.return_value.stdout = six.StringIO('stdout') mock_popen.return_value.stderr = six.StringIO('stderr') mock_popen.return_value.poll.return_value = None mock_popen.return_value.wait.return_value = 0 log_lines = [ 'SPARK_MAJOR_VERSION is set to 2, using Spark2', 'WARN NativeCodeLoader: Unable to load native-hadoop library for your ' + 'platform... using builtin-java classes where applicable', 'WARN DomainSocketFactory: The short-circuit local reads feature cannot ' + 'be used because libhadoop cannot be loaded.', 'INFO Client: Requesting a new application from cluster with 10 ' + 'NodeManagerapplication_1486558679801_1820s', 'INFO Client: Submitting application application_1486558679801_1820 ' + 'to ResourceManager' ] hook = SparkSubmitHook(conn_id='spark_yarn_cluster') hook._process_spark_submit_log(log_lines) hook.submit() # When hook.on_kill() # Then self.assertIn(call(['yarn', 'application', '-kill', 'application_1486558679801_1820'], stderr=-1, stdout=-1), mock_popen.mock_calls)
def test_process_spark_submit_log_standalone_cluster(self): # Given hook = SparkSubmitHook(conn_id='spark_standalone_cluster') log_lines = [ 'Running Spark using the REST application submission protocol.', '17/11/28 11:14:15 INFO RestSubmissionClient: Submitting a request ' 'to launch an application in spark://spark-standalone-master:6066', '17/11/28 11:14:15 INFO RestSubmissionClient: Submission successfully ' + 'created as driver-20171128111415-0001. Polling submission state...' ] # When hook._process_spark_submit_log(log_lines) # Then self.assertEqual(hook._driver_id, 'driver-20171128111415-0001')
def test_process_spark_submit_log_yarn(self): # Given hook = SparkSubmitHook(conn_id='spark_yarn_cluster') log_lines = [ 'SPARK_MAJOR_VERSION is set to 2, using Spark2', 'WARN NativeCodeLoader: Unable to load native-hadoop library for your ' + 'platform... using builtin-java classes where applicable', 'WARN DomainSocketFactory: The short-circuit local reads feature cannot ' 'be used because libhadoop cannot be loaded.', 'INFO Client: Requesting a new application from cluster with 10 NodeManagers', 'INFO Client: Submitting application application_1486558679801_1820 ' + 'to ResourceManager' ] # When hook._process_spark_submit_log(log_lines) # Then self.assertEqual(hook._yarn_application_id, 'application_1486558679801_1820')
def test_k8s_process_on_kill(self, mock_popen, mock_client_method): # Given mock_popen.return_value.stdout = six.StringIO('stdout') mock_popen.return_value.stderr = six.StringIO('stderr') mock_popen.return_value.poll.return_value = None mock_popen.return_value.wait.return_value = 0 client = mock_client_method.return_value hook = SparkSubmitHook(conn_id='spark_k8s_cluster') log_lines = [ 'INFO LoggingPodStatusWatcherImpl:54 - State changed, new state:' + 'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' + 'namespace: default' + 'labels: spark-app-selector -> spark-465b868ada474bda82ccb84ab2747fcd,' + 'spark-role -> driver' + 'pod uid: ba9c61f6-205f-11e8-b65f-d48564c88e42' + 'creation time: 2018-03-05T10:26:55Z' + 'service account name: spark' + 'volumes: spark-init-properties, download-jars-volume,' + 'download-files-volume, spark-token-2vmlm' + 'node name: N/A' + 'start time: N/A' + 'container images: N/A' + 'phase: Pending' + 'status: []' + '2018-03-05 11:26:56 INFO LoggingPodStatusWatcherImpl:54 - State changed,' + ' new state:' + 'pod name: spark-pi-edf2ace37be7353a958b38733a12f8e6-driver' + 'namespace: default' + 'Exit code: 0' ] hook._process_spark_submit_log(log_lines) hook.submit() # When hook.on_kill() # Then import kubernetes kwargs = {'pretty': True, 'body': kubernetes.client.V1DeleteOptions()} client.delete_namespaced_pod.assert_called_once_with( 'spark-pi-edf2ace37be7353a958b38733a12f8e6-driver', 'mynamespace', **kwargs)
def test_standalone_cluster_process_on_kill(self): # Given log_lines = [ 'Running Spark using the REST application submission protocol.', '17/11/28 11:14:15 INFO RestSubmissionClient: Submitting a request ' + 'to launch an application in spark://spark-standalone-master:6066', '17/11/28 11:14:15 INFO RestSubmissionClient: Submission successfully ' + 'created as driver-20171128111415-0001. Polling submission state...' ] hook = SparkSubmitHook(conn_id='spark_standalone_cluster') hook._process_spark_submit_log(log_lines) # When kill_cmd = hook._build_spark_driver_kill_command() # Then self.assertEqual(kill_cmd[0], '/path/to/spark_home/bin/spark-submit') self.assertEqual(kill_cmd[1], '--master') self.assertEqual(kill_cmd[2], 'spark://spark-standalone-master:6066') self.assertEqual(kill_cmd[3], '--kill') self.assertEqual(kill_cmd[4], 'driver-20171128111415-0001')