示例#1
0
    def test_multiline_message(self):
        lines = StringIO(
            "2015-08-22 00:47:35,323 INFO org.apache.hadoop.mapreduce.Job"
            " (main): Counters: 54\r\n"
            "        File System Counters\r\n"
            "                FILE: Number of bytes read=83\r\n"
        )

        self.assertEqual(
            list(_parse_hadoop_log_lines(lines)),
            [
                dict(
                    timestamp="2015-08-22 00:47:35,323",
                    level="INFO",
                    logger="org.apache.hadoop.mapreduce.Job",
                    thread="main",
                    # strip \r's, no trailing \n
                    message=(
                        "Counters: 54\n"
                        "        File System Counters\n"
                        "                FILE: Number of bytes read=83"
                    ),
                )
            ],
        )
示例#2
0
    def test_non_log_lines(self):
        lines = StringIO('foo\n'
                         'bar\n'
                         '15/12/11 13:26:08 ERROR streaming.StreamJob:'
                         ' Error Launching job :'
                         ' Output directory already exists\n'
                         'Streaming Command Failed!')

        with no_handlers_for_logger('mrjob.logs.parse'):
            stderr = StringIO()
            log_to_stream('mrjob.logs.parse', stderr)

            self.assertEqual(
            list(_parse_hadoop_log_lines(lines)), [
                # ignore leading non-log lines
                dict(
                    timestamp='15/12/11 13:26:08',
                    level='ERROR',
                    logger='streaming.StreamJob',
                    thread=None,
                    # no way to know that Streaming Command Failed! wasn't part
                    # of a multi-line message
                    message=('Error Launching job :'
                             ' Output directory already exists\n'
                             'Streaming Command Failed!'))
            ])

            # should be one warning for each leading non-log line
            log_lines = stderr.getvalue().splitlines()
            self.assertEqual(len(log_lines), 2)
示例#3
0
 def test_log_lines(self):
     lines = StringIO(
         "15/12/11 13:26:07 INFO client.RMProxy:"
         " Connecting to ResourceManager at /0.0.0.0:8032\n"
         "15/12/11 13:26:08 ERROR streaming.StreamJob:"
         " Error Launching job :"
         " Output directory already exists\n"
     )
     self.assertEqual(
         list(_parse_hadoop_log_lines(lines)),
         [
             dict(
                 timestamp="15/12/11 13:26:07",
                 level="INFO",
                 logger="client.RMProxy",
                 thread=None,
                 message="Connecting to ResourceManager at /0.0.0.0:8032",
             ),
             dict(
                 timestamp="15/12/11 13:26:08",
                 level="ERROR",
                 logger="streaming.StreamJob",
                 thread=None,
                 message=("Error Launching job :" " Output directory already exists"),
             ),
         ],
     )
示例#4
0
    def test_non_log_lines(self):
        lines = StringIO(
            "foo\n"
            "bar\n"
            "15/12/11 13:26:08 ERROR streaming.StreamJob:"
            " Error Launching job :"
            " Output directory already exists\n"
            "Streaming Command Failed!"
        )

        with no_handlers_for_logger("mrjob.logs.parse"):
            stderr = StringIO()
            log_to_stream("mrjob.logs.parse", stderr)

            self.assertEqual(
                list(_parse_hadoop_log_lines(lines)),
                [
                    # ignore leading non-log lines
                    dict(
                        timestamp="15/12/11 13:26:08",
                        level="ERROR",
                        logger="streaming.StreamJob",
                        thread=None,
                        # no way to know that Streaming Command Failed! wasn't part
                        # of a multi-line message
                        message=(
                            "Error Launching job :" " Output directory already exists\n" "Streaming Command Failed!"
                        ),
                    )
                ],
            )

            # should be one warning for each leading non-log line
            log_lines = stderr.getvalue().splitlines()
            self.assertEqual(len(log_lines), 2)
示例#5
0
    def test_non_log_lines(self):
        lines = StringIO('foo\n'
                         'bar\n'
                         '15/12/11 13:26:08 ERROR streaming.StreamJob:'
                         ' Error Launching job :'
                         ' Output directory already exists\n'
                         'Streaming Command Failed!')

        with no_handlers_for_logger('mrjob.logs.parse'):
            stderr = StringIO()
            log_to_stream('mrjob.logs.parse', stderr)

            self.assertEqual(
                list(_parse_hadoop_log_lines(lines)),
                [
                    # ignore leading non-log lines
                    dict(
                        timestamp='15/12/11 13:26:08',
                        level='ERROR',
                        logger='streaming.StreamJob',
                        thread=None,
                        # no way to know that Streaming Command Failed! wasn't part
                        # of a multi-line message
                        message=('Error Launching job :'
                                 ' Output directory already exists\n'
                                 'Streaming Command Failed!'))
                ])

            # should be one warning for each leading non-log line
            log_lines = stderr.getvalue().splitlines()
            self.assertEqual(len(log_lines), 2)
示例#6
0
 def test_trailing_carriage_return(self):
     lines = StringIO('15/12/11 13:26:07 INFO client.RMProxy:'
                      ' Connecting to ResourceManager at /0.0.0.0:8032\r\n')
     self.assertEqual(list(_parse_hadoop_log_lines(lines)), [
         dict(timestamp='15/12/11 13:26:07',
              level='INFO',
              logger='client.RMProxy',
              thread=None,
              message='Connecting to ResourceManager at /0.0.0.0:8032')
     ])
示例#7
0
    def test_thread(self):
        lines = StringIO(
            '2015-08-22 00:46:18,411 INFO amazon.emr.metrics.MetricsSaver'
            ' (main): Thread 1 created MetricsLockFreeSaver 1\n')

        self.assertEqual(list(_parse_hadoop_log_lines(lines)), [
            dict(timestamp='2015-08-22 00:46:18,411',
                 level='INFO',
                 logger='amazon.emr.metrics.MetricsSaver',
                 thread='main',
                 message='Thread 1 created MetricsLockFreeSaver 1')
        ])
示例#8
0
 def test_trailing_carriage_return(self):
     lines = StringIO('15/12/11 13:26:07 INFO client.RMProxy:'
                      ' Connecting to ResourceManager at /0.0.0.0:8032\r\n')
     self.assertEqual(
         list(_parse_hadoop_log_lines(lines)), [
             dict(
                 timestamp='15/12/11 13:26:07',
                 level='INFO',
                 logger='client.RMProxy',
                 thread=None,
                 message='Connecting to ResourceManager at /0.0.0.0:8032')
         ])
示例#9
0
 def test_trailing_carriage_return(self):
     lines = StringIO("15/12/11 13:26:07 INFO client.RMProxy:" " Connecting to ResourceManager at /0.0.0.0:8032\r\n")
     self.assertEqual(
         list(_parse_hadoop_log_lines(lines)),
         [
             dict(
                 timestamp="15/12/11 13:26:07",
                 level="INFO",
                 logger="client.RMProxy",
                 thread=None,
                 message="Connecting to ResourceManager at /0.0.0.0:8032",
             )
         ],
     )
示例#10
0
    def test_thread(self):
        lines = StringIO(
            '2015-08-22 00:46:18,411 INFO amazon.emr.metrics.MetricsSaver'
            ' (main): Thread 1 created MetricsLockFreeSaver 1\n')

        self.assertEqual(
            list(_parse_hadoop_log_lines(lines)), [
                dict(
                    timestamp='2015-08-22 00:46:18,411',
                    level='INFO',
                    logger='amazon.emr.metrics.MetricsSaver',
                    thread='main',
                    message='Thread 1 created MetricsLockFreeSaver 1')
            ])
示例#11
0
    def test_thread(self):
        lines = StringIO(
            "2015-08-22 00:46:18,411 INFO amazon.emr.metrics.MetricsSaver"
            " (main): Thread 1 created MetricsLockFreeSaver 1\n"
        )

        self.assertEqual(
            list(_parse_hadoop_log_lines(lines)),
            [
                dict(
                    timestamp="2015-08-22 00:46:18,411",
                    level="INFO",
                    logger="amazon.emr.metrics.MetricsSaver",
                    thread="main",
                    message="Thread 1 created MetricsLockFreeSaver 1",
                )
            ],
        )
示例#12
0
 def test_log_lines(self):
     lines = StringIO('15/12/11 13:26:07 INFO client.RMProxy:'
                      ' Connecting to ResourceManager at /0.0.0.0:8032\n'
                      '15/12/11 13:26:08 ERROR streaming.StreamJob:'
                      ' Error Launching job :'
                      ' Output directory already exists\n')
     self.assertEqual(list(_parse_hadoop_log_lines(lines)), [
         dict(timestamp='15/12/11 13:26:07',
              level='INFO',
              logger='client.RMProxy',
              thread=None,
              message='Connecting to ResourceManager at /0.0.0.0:8032'),
         dict(timestamp='15/12/11 13:26:08',
              level='ERROR',
              logger='streaming.StreamJob',
              thread=None,
              message=('Error Launching job :'
                       ' Output directory already exists'))
     ])
示例#13
0
    def test_multiline_message(self):
        lines = StringIO(
            '2015-08-22 00:47:35,323 INFO org.apache.hadoop.mapreduce.Job'
            ' (main): Counters: 54\r\n'
            '        File System Counters\r\n'
            '                FILE: Number of bytes read=83\r\n')

        self.assertEqual(
            list(_parse_hadoop_log_lines(lines)), [
                dict(
                    timestamp='2015-08-22 00:47:35,323',
                    level='INFO',
                    logger='org.apache.hadoop.mapreduce.Job',
                    thread='main',
                    # strip \r's, no trailing \n
                    message=('Counters: 54\n'
                             '        File System Counters\n'
                             '                FILE: Number of bytes read=83'))
            ])
示例#14
0
    def test_multiline_message(self):
        lines = StringIO(
            '2015-08-22 00:47:35,323 INFO org.apache.hadoop.mapreduce.Job'
            ' (main): Counters: 54\r\n'
            '        File System Counters\r\n'
            '                FILE: Number of bytes read=83\r\n')

        self.assertEqual(
            list(_parse_hadoop_log_lines(lines)), [
                dict(
                    timestamp='2015-08-22 00:47:35,323',
                    level='INFO',
                    logger='org.apache.hadoop.mapreduce.Job',
                    thread='main',
                    # strip \r's, no trailing \n
                    message=('Counters: 54\n'
                             '        File System Counters\n'
                             '                FILE: Number of bytes read=83'))
            ])
示例#15
0
 def test_log_lines(self):
     lines = StringIO('15/12/11 13:26:07 INFO client.RMProxy:'
                      ' Connecting to ResourceManager at /0.0.0.0:8032\n'
                      '15/12/11 13:26:08 ERROR streaming.StreamJob:'
                      ' Error Launching job :'
                      ' Output directory already exists\n')
     self.assertEqual(
         list(_parse_hadoop_log_lines(lines)), [
             dict(
                 timestamp='15/12/11 13:26:07',
                 level='INFO',
                 logger='client.RMProxy',
                 thread=None,
                 message='Connecting to ResourceManager at /0.0.0.0:8032'),
             dict(
                 timestamp='15/12/11 13:26:08',
                 level='ERROR',
                 logger='streaming.StreamJob',
                 thread=None,
                 message=('Error Launching job :'
                          ' Output directory already exists'))
         ])
示例#16
0
 def test_empty(self):
     self.assertEqual(list(_parse_hadoop_log_lines([])), [])
示例#17
0
 def test_empty(self):
     self.assertEqual(list(_parse_hadoop_log_lines([])), [])