Пример #1
0
    def test_yarn_error(self):
        lines = [
            '2015-12-21 14:06:18,538 WARN [main]'
            ' org.apache.hadoop.mapred.YarnChild: Exception running child'
            ' : java.lang.RuntimeException: PipeMapRed.waitOutputThreads():'
            ' subprocess failed with code 1\n',
            '        at org.apache.hadoop.streaming.PipeMapRed'
            '.waitOutputThreads(PipeMapRed.java:322)\n',
            '        at org.apache.hadoop.streaming.PipeMapRed'
            '.mapRedFinished(PipeMapRed.java:535)\n',
        ]

        self.assertEqual(
            _parse_task_syslog(lines),
            dict(
                hadoop_error=dict(
                    message=(
                        'Exception running child : java.lang.RuntimeException:'
                        ' PipeMapRed.waitOutputThreads():'
                        ' subprocess failed with code 1\n'
                        '        at org.apache.hadoop.streaming.PipeMapRed'
                        '.waitOutputThreads(PipeMapRed.java:322)\n'
                        '        at org.apache.hadoop.streaming.PipeMapRed'
                        '.mapRedFinished(PipeMapRed.java:535)'),
                    num_lines=3,
                    start_line=0,
                )
            ))
Пример #2
0
    def test_spark_executor_exception(self):
        lines = [
            '16/11/16 22:05:00 ERROR Executor: Exception in task 0.2 in stage'
            ' 0.0 (TID 4)'
            ' org.apache.spark.api.python.PythonException: Traceback (most'
            ' recent call last):\n',
            '  File "/mnt/yarn/usercache/hadoop/appcache/application'
            '_1479325434015_0003/container_1479325434015_0003_02_000002/'
            'pyspark.zip/pyspark/worker.py", line 111, in main process()\n'
            'Exception: KABOOM\n',
            '\n',
            '        at org.apache.spark.api.python.PythonRunner$$anon$1.read'
            '(PythonRDD.scala:166)\n',
        ]

        self.assertEqual(
            _parse_task_syslog(lines),
            dict(hadoop_error=dict(
                message=('Exception in task 0.2 in stage'
                         ' 0.0 (TID 4)'
                         ' org.apache.spark.api.python.PythonException:'
                         ' Traceback (most recent call last):\n'
                         '  File "/mnt/yarn/usercache/hadoop/appcache/'
                         'application_1479325434015_0003/container'
                         '_1479325434015_0003_02_000002/pyspark.zip/pyspark/'
                         'worker.py", line 111, in main process()\n'
                         'Exception: KABOOM\n'
                         '\n'
                         '        at org.apache.spark.api.python.PythonRunner'
                         '$$anon$1.read(PythonRDD.scala:166)'),
                num_lines=4,
                start_line=0,
            )))
Пример #3
0
    def test_yarn_error(self):
        lines = [
            '2015-12-21 14:06:18,538 WARN [main]'
            ' org.apache.hadoop.mapred.YarnChild: Exception running child'
            ' : java.lang.RuntimeException: PipeMapRed.waitOutputThreads():'
            ' subprocess failed with code 1\n',
            '        at org.apache.hadoop.streaming.PipeMapRed'
            '.waitOutputThreads(PipeMapRed.java:322)\n',
            '        at org.apache.hadoop.streaming.PipeMapRed'
            '.mapRedFinished(PipeMapRed.java:535)\n',
        ]

        self.assertEqual(
            _parse_task_syslog(lines),
            dict(hadoop_error=dict(
                message=(
                    'Exception running child : java.lang.RuntimeException:'
                    ' PipeMapRed.waitOutputThreads():'
                    ' subprocess failed with code 1\n'
                    '        at org.apache.hadoop.streaming.PipeMapRed'
                    '.waitOutputThreads(PipeMapRed.java:322)\n'
                    '        at org.apache.hadoop.streaming.PipeMapRed'
                    '.mapRedFinished(PipeMapRed.java:535)'),
                num_lines=3,
                start_line=0,
            )))
Пример #4
0
    def test_opening_file(self):
        lines = [
            "2010-07-27 17:54:54,344 INFO"
            " org.apache.hadoop.fs.s3native.NativeS3FileSystem (main):"
            " Opening 's3://yourbucket/logs/2010/07/23/log2-00077.gz'"
            " for reading\n"
        ]

        self.assertEqual(
            _parse_task_syslog(lines), dict(split=dict(path="s3://yourbucket/logs/2010/07/23/log2-00077.gz"))
        )
Пример #5
0
    def test_opening_file(self):
        lines = [
            '2010-07-27 17:54:54,344 INFO'
            ' org.apache.hadoop.fs.s3native.NativeS3FileSystem (main):'
            " Opening 's3://yourbucket/logs/2010/07/23/log2-00077.gz'"
            ' for reading\n'
        ]

        self.assertEqual(
            _parse_task_syslog(lines),
            dict(split=dict(
                path='s3://yourbucket/logs/2010/07/23/log2-00077.gz')))
Пример #6
0
    def test_split(self):
        lines = [
            '2015-12-21 14:06:17,707 INFO [main]'
            ' org.apache.hadoop.mapred.MapTask: Processing split:'
            ' hdfs://e4270474c8ee:9000/user/root/tmp/mrjob'
            '/mr_boom.root.20151221.190511.059097/files/bootstrap.sh:0+335\n',
        ]

        self.assertEqual(
            _parse_task_syslog(lines),
            dict(
                split=dict(path=('hdfs://e4270474c8ee:9000/user/root/tmp/mrjob'
                                 '/mr_boom.root.20151221.190511.059097/files'
                                 '/bootstrap.sh'),
                           start_line=0,
                           num_lines=335)))
Пример #7
0
    def test_split(self):
        lines = [
            '2015-12-21 14:06:17,707 INFO [main]'
            ' org.apache.hadoop.mapred.MapTask: Processing split:'
            ' hdfs://e4270474c8ee:9000/user/root/tmp/mrjob'
            '/mr_boom.root.20151221.190511.059097/files/bootstrap.sh:0+335\n',
        ]

        self.assertEqual(
            _parse_task_syslog(lines),
            dict(
                split=dict(
                    path=('hdfs://e4270474c8ee:9000/user/root/tmp/mrjob'
                          '/mr_boom.root.20151221.190511.059097/files'
                          '/bootstrap.sh'),
                    start_line=0,
                    num_lines=335)))
Пример #8
0
    def test_pre_yarn_error(self):
        lines = [
            '2015-12-30 19:21:39,980 WARN'
            ' org.apache.hadoop.mapred.Child (main): Error running child\n',
            'java.lang.RuntimeException: PipeMapRed.waitOutputThreads():'
            ' subprocess failed with code 1\n',
            '        at org.apache.hadoop.streaming.PipeMapRed'
            '.waitOutputThreads(PipeMapRed.java:372)\n',
        ]

        self.assertEqual(
            _parse_task_syslog(lines),
            dict(hadoop_error=dict(
                message=('Error running child\n'
                         'java.lang.RuntimeException:'
                         ' PipeMapRed.waitOutputThreads():'
                         ' subprocess failed with code 1\n'
                         '        at org.apache.hadoop.streaming.PipeMapRed'
                         '.waitOutputThreads(PipeMapRed.java:372)'),
                num_lines=3,
                start_line=0,
            )))
Пример #9
0
    def test_spark_application_failed(self):
        lines = [
            '16/11/16 22:26:22 ERROR ApplicationMaster: User application'
            ' exited with status 1\n',
            '16/11/16 22:26:22 INFO ApplicationMaster: Final app status:'
            ' FAILED, exitCode: 1, (reason: User application exited with'
            ' status 1)\n',
            '16/11/16 22:26:31 ERROR ApplicationMaster: SparkContext did not'
            ' initialize after waiting for 100000 ms. Please check earlier'
            ' log output for errors. Failing the application.\n',
        ]

        self.assertEqual(
            _parse_task_syslog(lines),
            dict(
                check_stdout=True,
                hadoop_error=dict(
                    message='User application exited with status 1',
                    num_lines=1,
                    start_line=0,
                ),
            ))
Пример #10
0
    def test_spark_application_failed(self):
        lines = [
            '16/11/16 22:26:22 ERROR ApplicationMaster: User application'
            ' exited with status 1\n',
            '16/11/16 22:26:22 INFO ApplicationMaster: Final app status:'
            ' FAILED, exitCode: 1, (reason: User application exited with'
            ' status 1)\n',
            '16/11/16 22:26:31 ERROR ApplicationMaster: SparkContext did not'
            ' initialize after waiting for 100000 ms. Please check earlier'
            ' log output for errors. Failing the application.\n',
        ]

        self.assertEqual(
            _parse_task_syslog(lines),
            dict(
                check_stdout=True,
                hadoop_error=dict(
                    message='User application exited with status 1',
                    num_lines=1,
                    start_line=0,
                ),
            )
        )
Пример #11
0
    def test_pre_yarn_error(self):
        lines = [
            '2015-12-30 19:21:39,980 WARN'
            ' org.apache.hadoop.mapred.Child (main): Error running child\n',
            'java.lang.RuntimeException: PipeMapRed.waitOutputThreads():'
            ' subprocess failed with code 1\n',
            '        at org.apache.hadoop.streaming.PipeMapRed'
            '.waitOutputThreads(PipeMapRed.java:372)\n',
        ]

        self.assertEqual(
            _parse_task_syslog(lines),
            dict(
                hadoop_error=dict(
                    message=(
                        'Error running child\n'
                        'java.lang.RuntimeException:'
                        ' PipeMapRed.waitOutputThreads():'
                        ' subprocess failed with code 1\n'
                        '        at org.apache.hadoop.streaming.PipeMapRed'
                        '.waitOutputThreads(PipeMapRed.java:372)'),
                    num_lines=3,
                    start_line=0,
                )))
Пример #12
0
    def test_spark_executor_exception(self):
        lines = [
            '16/11/16 22:05:00 ERROR Executor: Exception in task 0.2 in stage'
            ' 0.0 (TID 4)'
            ' org.apache.spark.api.python.PythonException: Traceback (most'
            ' recent call last):\n',
            '  File "/mnt/yarn/usercache/hadoop/appcache/application'
            '_1479325434015_0003/container_1479325434015_0003_02_000002/'
            'pyspark.zip/pyspark/worker.py", line 111, in main process()\n'
            'Exception: KABOOM\n',
            '\n',
            '        at org.apache.spark.api.python.PythonRunner$$anon$1.read'
            '(PythonRDD.scala:166)\n',
        ]

        self.assertEqual(
            _parse_task_syslog(lines),
            dict(
                hadoop_error=dict(
                    message=(
                        'Exception in task 0.2 in stage'
                        ' 0.0 (TID 4)'
                        ' org.apache.spark.api.python.PythonException:'
                        ' Traceback (most recent call last):\n'
                        '  File "/mnt/yarn/usercache/hadoop/appcache/'
                        'application_1479325434015_0003/container'
                        '_1479325434015_0003_02_000002/pyspark.zip/pyspark/'
                        'worker.py", line 111, in main process()\n'
                        'Exception: KABOOM\n'
                        '\n'
                        '        at org.apache.spark.api.python.PythonRunner'
                        '$$anon$1.read(PythonRDD.scala:166)'),
                    num_lines=4,
                    start_line=0,
                )
            ))
Пример #13
0
    def test_pre_yarn_error(self):
        lines = [
            "2015-12-30 19:21:39,980 WARN" " org.apache.hadoop.mapred.Child (main): Error running child\n",
            "java.lang.RuntimeException: PipeMapRed.waitOutputThreads():" " subprocess failed with code 1\n",
            "        at org.apache.hadoop.streaming.PipeMapRed" ".waitOutputThreads(PipeMapRed.java:372)\n",
        ]

        self.assertEqual(
            _parse_task_syslog(lines),
            dict(
                hadoop_error=dict(
                    message=(
                        "Error running child\n"
                        "java.lang.RuntimeException:"
                        " PipeMapRed.waitOutputThreads():"
                        " subprocess failed with code 1\n"
                        "        at org.apache.hadoop.streaming.PipeMapRed"
                        ".waitOutputThreads(PipeMapRed.java:372)"
                    ),
                    num_lines=3,
                    start_line=0,
                )
            ),
        )
Пример #14
0
 def test_empty(self):
     self.assertEqual(_parse_task_syslog([]), {})
Пример #15
0
 def test_empty(self):
     self.assertEqual(_parse_task_syslog([]), {})