def test_yarn_error(self): lines = [ '2015-12-21 14:06:18,538 WARN [main]' ' org.apache.hadoop.mapred.YarnChild: Exception running child' ' : java.lang.RuntimeException: PipeMapRed.waitOutputThreads():' ' subprocess failed with code 1\n', ' at org.apache.hadoop.streaming.PipeMapRed' '.waitOutputThreads(PipeMapRed.java:322)\n', ' at org.apache.hadoop.streaming.PipeMapRed' '.mapRedFinished(PipeMapRed.java:535)\n', ] self.assertEqual( _parse_task_syslog(lines), dict( hadoop_error=dict( message=( 'Exception running child : java.lang.RuntimeException:' ' PipeMapRed.waitOutputThreads():' ' subprocess failed with code 1\n' ' at org.apache.hadoop.streaming.PipeMapRed' '.waitOutputThreads(PipeMapRed.java:322)\n' ' at org.apache.hadoop.streaming.PipeMapRed' '.mapRedFinished(PipeMapRed.java:535)'), num_lines=3, start_line=0, ) ))
def test_spark_executor_exception(self): lines = [ '16/11/16 22:05:00 ERROR Executor: Exception in task 0.2 in stage' ' 0.0 (TID 4)' ' org.apache.spark.api.python.PythonException: Traceback (most' ' recent call last):\n', ' File "/mnt/yarn/usercache/hadoop/appcache/application' '_1479325434015_0003/container_1479325434015_0003_02_000002/' 'pyspark.zip/pyspark/worker.py", line 111, in main process()\n' 'Exception: KABOOM\n', '\n', ' at org.apache.spark.api.python.PythonRunner$$anon$1.read' '(PythonRDD.scala:166)\n', ] self.assertEqual( _parse_task_syslog(lines), dict(hadoop_error=dict( message=('Exception in task 0.2 in stage' ' 0.0 (TID 4)' ' org.apache.spark.api.python.PythonException:' ' Traceback (most recent call last):\n' ' File "/mnt/yarn/usercache/hadoop/appcache/' 'application_1479325434015_0003/container' '_1479325434015_0003_02_000002/pyspark.zip/pyspark/' 'worker.py", line 111, in main process()\n' 'Exception: KABOOM\n' '\n' ' at org.apache.spark.api.python.PythonRunner' '$$anon$1.read(PythonRDD.scala:166)'), num_lines=4, start_line=0, )))
def test_yarn_error(self): lines = [ '2015-12-21 14:06:18,538 WARN [main]' ' org.apache.hadoop.mapred.YarnChild: Exception running child' ' : java.lang.RuntimeException: PipeMapRed.waitOutputThreads():' ' subprocess failed with code 1\n', ' at org.apache.hadoop.streaming.PipeMapRed' '.waitOutputThreads(PipeMapRed.java:322)\n', ' at org.apache.hadoop.streaming.PipeMapRed' '.mapRedFinished(PipeMapRed.java:535)\n', ] self.assertEqual( _parse_task_syslog(lines), dict(hadoop_error=dict( message=( 'Exception running child : java.lang.RuntimeException:' ' PipeMapRed.waitOutputThreads():' ' subprocess failed with code 1\n' ' at org.apache.hadoop.streaming.PipeMapRed' '.waitOutputThreads(PipeMapRed.java:322)\n' ' at org.apache.hadoop.streaming.PipeMapRed' '.mapRedFinished(PipeMapRed.java:535)'), num_lines=3, start_line=0, )))
def test_opening_file(self): lines = [ "2010-07-27 17:54:54,344 INFO" " org.apache.hadoop.fs.s3native.NativeS3FileSystem (main):" " Opening 's3://yourbucket/logs/2010/07/23/log2-00077.gz'" " for reading\n" ] self.assertEqual( _parse_task_syslog(lines), dict(split=dict(path="s3://yourbucket/logs/2010/07/23/log2-00077.gz")) )
def test_opening_file(self): lines = [ '2010-07-27 17:54:54,344 INFO' ' org.apache.hadoop.fs.s3native.NativeS3FileSystem (main):' " Opening 's3://yourbucket/logs/2010/07/23/log2-00077.gz'" ' for reading\n' ] self.assertEqual( _parse_task_syslog(lines), dict(split=dict( path='s3://yourbucket/logs/2010/07/23/log2-00077.gz')))
def test_split(self): lines = [ '2015-12-21 14:06:17,707 INFO [main]' ' org.apache.hadoop.mapred.MapTask: Processing split:' ' hdfs://e4270474c8ee:9000/user/root/tmp/mrjob' '/mr_boom.root.20151221.190511.059097/files/bootstrap.sh:0+335\n', ] self.assertEqual( _parse_task_syslog(lines), dict( split=dict(path=('hdfs://e4270474c8ee:9000/user/root/tmp/mrjob' '/mr_boom.root.20151221.190511.059097/files' '/bootstrap.sh'), start_line=0, num_lines=335)))
def test_split(self): lines = [ '2015-12-21 14:06:17,707 INFO [main]' ' org.apache.hadoop.mapred.MapTask: Processing split:' ' hdfs://e4270474c8ee:9000/user/root/tmp/mrjob' '/mr_boom.root.20151221.190511.059097/files/bootstrap.sh:0+335\n', ] self.assertEqual( _parse_task_syslog(lines), dict( split=dict( path=('hdfs://e4270474c8ee:9000/user/root/tmp/mrjob' '/mr_boom.root.20151221.190511.059097/files' '/bootstrap.sh'), start_line=0, num_lines=335)))
def test_pre_yarn_error(self): lines = [ '2015-12-30 19:21:39,980 WARN' ' org.apache.hadoop.mapred.Child (main): Error running child\n', 'java.lang.RuntimeException: PipeMapRed.waitOutputThreads():' ' subprocess failed with code 1\n', ' at org.apache.hadoop.streaming.PipeMapRed' '.waitOutputThreads(PipeMapRed.java:372)\n', ] self.assertEqual( _parse_task_syslog(lines), dict(hadoop_error=dict( message=('Error running child\n' 'java.lang.RuntimeException:' ' PipeMapRed.waitOutputThreads():' ' subprocess failed with code 1\n' ' at org.apache.hadoop.streaming.PipeMapRed' '.waitOutputThreads(PipeMapRed.java:372)'), num_lines=3, start_line=0, )))
def test_spark_application_failed(self): lines = [ '16/11/16 22:26:22 ERROR ApplicationMaster: User application' ' exited with status 1\n', '16/11/16 22:26:22 INFO ApplicationMaster: Final app status:' ' FAILED, exitCode: 1, (reason: User application exited with' ' status 1)\n', '16/11/16 22:26:31 ERROR ApplicationMaster: SparkContext did not' ' initialize after waiting for 100000 ms. Please check earlier' ' log output for errors. Failing the application.\n', ] self.assertEqual( _parse_task_syslog(lines), dict( check_stdout=True, hadoop_error=dict( message='User application exited with status 1', num_lines=1, start_line=0, ), ))
def test_spark_application_failed(self): lines = [ '16/11/16 22:26:22 ERROR ApplicationMaster: User application' ' exited with status 1\n', '16/11/16 22:26:22 INFO ApplicationMaster: Final app status:' ' FAILED, exitCode: 1, (reason: User application exited with' ' status 1)\n', '16/11/16 22:26:31 ERROR ApplicationMaster: SparkContext did not' ' initialize after waiting for 100000 ms. Please check earlier' ' log output for errors. Failing the application.\n', ] self.assertEqual( _parse_task_syslog(lines), dict( check_stdout=True, hadoop_error=dict( message='User application exited with status 1', num_lines=1, start_line=0, ), ) )
def test_pre_yarn_error(self): lines = [ '2015-12-30 19:21:39,980 WARN' ' org.apache.hadoop.mapred.Child (main): Error running child\n', 'java.lang.RuntimeException: PipeMapRed.waitOutputThreads():' ' subprocess failed with code 1\n', ' at org.apache.hadoop.streaming.PipeMapRed' '.waitOutputThreads(PipeMapRed.java:372)\n', ] self.assertEqual( _parse_task_syslog(lines), dict( hadoop_error=dict( message=( 'Error running child\n' 'java.lang.RuntimeException:' ' PipeMapRed.waitOutputThreads():' ' subprocess failed with code 1\n' ' at org.apache.hadoop.streaming.PipeMapRed' '.waitOutputThreads(PipeMapRed.java:372)'), num_lines=3, start_line=0, )))
def test_spark_executor_exception(self): lines = [ '16/11/16 22:05:00 ERROR Executor: Exception in task 0.2 in stage' ' 0.0 (TID 4)' ' org.apache.spark.api.python.PythonException: Traceback (most' ' recent call last):\n', ' File "/mnt/yarn/usercache/hadoop/appcache/application' '_1479325434015_0003/container_1479325434015_0003_02_000002/' 'pyspark.zip/pyspark/worker.py", line 111, in main process()\n' 'Exception: KABOOM\n', '\n', ' at org.apache.spark.api.python.PythonRunner$$anon$1.read' '(PythonRDD.scala:166)\n', ] self.assertEqual( _parse_task_syslog(lines), dict( hadoop_error=dict( message=( 'Exception in task 0.2 in stage' ' 0.0 (TID 4)' ' org.apache.spark.api.python.PythonException:' ' Traceback (most recent call last):\n' ' File "/mnt/yarn/usercache/hadoop/appcache/' 'application_1479325434015_0003/container' '_1479325434015_0003_02_000002/pyspark.zip/pyspark/' 'worker.py", line 111, in main process()\n' 'Exception: KABOOM\n' '\n' ' at org.apache.spark.api.python.PythonRunner' '$$anon$1.read(PythonRDD.scala:166)'), num_lines=4, start_line=0, ) ))
def test_pre_yarn_error(self): lines = [ "2015-12-30 19:21:39,980 WARN" " org.apache.hadoop.mapred.Child (main): Error running child\n", "java.lang.RuntimeException: PipeMapRed.waitOutputThreads():" " subprocess failed with code 1\n", " at org.apache.hadoop.streaming.PipeMapRed" ".waitOutputThreads(PipeMapRed.java:372)\n", ] self.assertEqual( _parse_task_syslog(lines), dict( hadoop_error=dict( message=( "Error running child\n" "java.lang.RuntimeException:" " PipeMapRed.waitOutputThreads():" " subprocess failed with code 1\n" " at org.apache.hadoop.streaming.PipeMapRed" ".waitOutputThreads(PipeMapRed.java:372)" ), num_lines=3, start_line=0, ) ), )
def test_empty(self): self.assertEqual(_parse_task_syslog([]), {})