Exemplo n.º 1
0
    def _parse_tables(report_str: str) -> Dict[str, str]:
        """Parse the tables from a fitter report

        Keys are the title of the table, values are the table body
        """

        hline = pp.lineStart() + pp.Word("+", "+-") + pp.lineEnd()

        title = (
            pp.lineStart()
            + ";"
            + pp.SkipTo(";")("title").setParseAction(pp.tokenMap(str.strip))
            + ";"
            + pp.lineEnd()
        )

        # Grab everything until the next horizontal line(s). Tables with
        # column headings will have a horizontal line after the headings and
        # at the end of the table. Odd tables without section headings will
        # only have a single horizontal line.
        data = pp.SkipTo(hline, failOn=pp.lineEnd() * 2, include=True)

        table = hline + title + pp.Combine(hline + data * (1, 2))("body")

        # Make line endings significant
        table.setWhitespaceChars(" \t")

        result = {t.title: t.body for t in table.searchString(report_str)}

        return result
Exemplo n.º 2
0
    def _parse_map_tables(report_str: str) -> Dict[str, str]:
        """
        Parse the tables from a ISE map report.

        Keys are the title of the table, values are the table body.
        """

        # Capture the title from section headings like:
        #
        # Section 12 - Control Set Information
        # ------------------------------------

        title = (
            pp.lineStart()
            + "Section"
            + ppc.integer
            + "-"
            + pp.SkipTo(pp.lineEnd())("title").setParseAction(pp.tokenMap(str.strip))
            + pp.lineEnd()
        )

        sec_hline = pp.Suppress(pp.lineStart() + pp.Word("-") + pp.lineEnd() * (1,))

        # Table horizontal lines like
        # +-------------------------------+
        hline = pp.lineStart() + pp.Word("+", "+-") + pp.lineEnd()

        # Most tables will have the format
        # +-----------------------+
        # | Col 1 | Col 2 | Col 3 |
        # +-----------------------+
        # | D1    | D2    | D3    |
        # ...
        # +-----------------------+
        #
        # However "Control Set Information" appears to use horizontal lines to
        # separate clocks within the data section. Therefore, just grab
        # everything until a horizontal line followed by a blank line rather
        # than something more precise.

        table = pp.Combine(hline + pp.SkipTo(hline + pp.LineEnd(), include=True))(
            "body"
        )
        table_section = title + sec_hline + table

        # Make line endings significant
        table_section.setWhitespaceChars(" \t")

        result = {t.title: t.body for t in table_section.searchString(report_str)}

        return result
Exemplo n.º 3
0
class BashHistoryParser(text_parser.PyparsingMultiLineTextParser):
    """Parses events from Bash history files."""

    NAME = u'bash'

    DESCRIPTION = u'Parser for Bash history files'

    _ENCODING = u'utf-8'

    _TIMESTAMP = pyparsing.Suppress(u'#') + pyparsing.Word(
        pyparsing.nums, min=9, max=10).setParseAction(
            text_parser.PyParseIntCast).setResultsName(u'timestamp')

    _COMMAND = pyparsing.Regex(r'.*?(?=($|\n#\d{10}))',
                               re.DOTALL).setResultsName(u'command')

    _LINE_GRAMMAR = _TIMESTAMP + _COMMAND + pyparsing.lineEnd()

    _VERIFICATION_GRAMMAR = (pyparsing.Regex(r'^\s?[^#].*?$', re.MULTILINE) +
                             _TIMESTAMP +
                             pyparsing.NotAny(pyparsing.pythonStyleComment))

    LINE_STRUCTURES = [(u'log_entry', _LINE_GRAMMAR)]

    def ParseRecord(self, mediator, key, structure):
        """Parses a record and produces a Bash history event.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): elements parsed from the file.

    Raises:
      UnableToParseFile: if an unsupported key is provided.
    """
        if key != u'log_entry':
            raise errors.UnableToParseFile(
                u'Unsupported key: {0:s}'.format(key))
        event = BashHistoryEvent(structure.timestamp, structure.command)
        mediator.ProduceEvent(event)

    def VerifyStructure(self, unused_mediator, line):
        """Verifies that this is a bash history file.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      line (str): single line from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
        match_generator = self._VERIFICATION_GRAMMAR.scanString(line,
                                                                maxMatches=1)
        return bool(list(match_generator))
Exemplo n.º 4
0
    def __init__(self):
        """constructor"""
        """make LAD parser"""
        self.NwNumber = pp.Word(pp.nums, max=1).setParseAction(pp.tokenMap(int)).setBreak(False)
        self.Nw = pp.CaselessLiteral('NW:') + self.NwNumber + pp.Suppress(pp.lineEnd())
        self.Ope_I = pp.Combine(pp.CaselessLiteral('I') + pp.Word(pp.nums, max=2))
        self.Ope_O = pp.Combine(pp.CaselessLiteral('O') + pp.Word(pp.nums, max=2))
        self.Ope_M = pp.Combine(pp.CaselessLiteral('M') + pp.Word(pp.nums, max=2))
        self.Ope = self.Ope_I | self.Ope_O | self.Ope_M

        self.Command_LD = (pp.CaselessKeyword('LDN') | pp.CaselessKeyword ('LD')) + self.Ope + pp.Suppress(pp.lineEnd())
        self.Command_AND = (pp.CaselessKeyword('ANDN') | pp.CaselessKeyword ('AND')) + self.Ope + pp.Suppress(pp.lineEnd())
        self.Command_OR = (pp.CaselessKeyword('ORN') | pp.CaselessKeyword('OR')) + self.Ope + pp.Suppress(pp.lineEnd())
        self.Command_OUT = pp.CaselessKeyword('OUT') + self.Ope + pp.Suppress(pp.lineEnd())

        self.Command_BSAND = pp.CaselessKeyword('BSAND') + pp.Suppress(pp.lineEnd())
        self.Command_BFAND = pp.CaselessKeyword('BFAND') + pp.Suppress(pp.lineEnd())
        self.Command_BSOR = pp.CaselessKeyword('BSOR') + pp.Suppress(pp.lineEnd())
        self.Command_BFOR = pp.CaselessKeyword('BFOR') + pp.Suppress(pp.lineEnd())

        self.Command_LDOR = self.Command_LD + self.Command_OR * (0, 7)
        self.Command_ANDOR = self.Command_AND + self.Command_OR * (0, 7)
        self.Command_LDAND  = self.Command_LDOR + self.Command_ANDOR * (0, 7)

        self.Complex = pp.Forward()
        self.Block = pp.Group((self.Complex | self.Command_LDAND) + pp.Optional(self.Command_ANDOR * (0, 7)))
        self.ComplexOR = self.Command_BSOR + self.Block + self.Block + self.Command_BFOR
        self.ComplexAND = self.Command_BSAND + self.Block + self.Block + self.Command_BFAND
        self.Complex <<= self.ComplexOR | self.ComplexAND

        self.NwProgram = pp.Group(self.Nw + self.Block + self.Command_OUT)

        self.Program = pp.OneOrMore(self.NwProgram)
Exemplo n.º 5
0
class TestPyparsingSingleLineTextParser(
    text_parser.PyparsingSingleLineTextParser):
  """Single line PyParsing-based text parser for testing purposes."""

  _ENCODING = 'utf-8'

  _LINE = pyparsing.Regex('.*') + pyparsing.lineEnd()

  LINE_STRUCTURES = [('line', _LINE)]

  def ParseRecord(self, parser_mediator, key, structure):
    """Parses a log record structure and produces events.

    This function takes as an input a parsed pyparsing structure
    and produces an EventObject if possible from that structure.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): tokens from a parsed log line.
    """
    return

  def VerifyStructure(self, parser_mediator, line):
    """Verify the structure of the file and return boolean based on that check.

    This function should read enough text from the text file to confirm
    that the file is the correct one for this particular parser.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (str): single line from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
    return True
Exemplo n.º 6
0
    def _parse_utilization_tables(util_str: str) -> Dict[str, str]:
        """
        Find all of the section titles and tables in a Vivado utilization report.

        These are returned as a dict with the section titles as keys and the table as the value.
        """

        # Find section headings, discarding the number and following horizontal
        # line. For example:
        #
        # 1.1 Summary of Registers by Type
        # --------------------------------

        sec_num = pp.Suppress(pp.lineStart() + pp.Word(pp.nums + "."))
        sec_title = sec_num + pp.SkipTo(
            pp.lineEnd())("title") + pp.lineEnd().suppress()

        # -------------------------------
        sec_hline = pp.Suppress(pp.lineStart() + pp.Word("-") + pp.lineEnd())
        sec_head = sec_title + sec_hline + pp.lineEnd().suppress()

        # Tables use horizontal lines with like the following to mark column
        # headings and the end of the table:
        #
        # +------+------+-------+

        table_hline = pp.lineStart() + pp.Word("+", "-+") + pp.lineEnd()

        # Tables may just be a header with no data rows, or a full header and
        # data rows, so there will be one or two more horizontal lines.

        data = pp.SkipTo(table_hline, failOn=pp.lineEnd() * 2, include=True)

        table = pp.Combine(table_hline + data * (1, 2))

        section = sec_head + table("table")

        # Make line endings significant
        section.setWhitespaceChars(" \t")

        table_dict = {
            x["title"]: x["table"]
            for x in section.searchString(util_str)
        }

        return table_dict
Exemplo n.º 7
0
    def build_parser(self):
        number = ppc.fraction | ppc.number

        short_hex_color = pp.Suppress('#') + pp.Word(pp.nums + pp.hexnums,
                                                     exact=3)
        short_hex_color.addParseAction(
            lambda t: tuple(int(ch + ch, 16) for ch in t[0]))
        long_hex_color = pp.Suppress('#') + pp.Word(pp.nums + pp.hexnums,
                                                    exact=6)
        long_hex_color.addParseAction(self.long_hex_color)
        hex_color = long_hex_color | short_hex_color
        hex_color.addParseAction(lambda: self._set_colorspace('rgb'))

        int_or_percent = (ppc.integer +
                          pp.Literal('%')('percent')) | ppc.integer
        int_or_percent.addParseAction(lambda t: t[0] * 255 / 100
                                      if t.percent else t[0])

        rgb_color_keyword = pp.Suppress('rgba(') | pp.Suppress('rgb(')
        rgb_color = rgb_color_keyword + pp.delimitedList(
            int_or_percent) + pp.Suppress(')')
        rgb_color.addParseAction(lambda t: (t[0], t[1], t[2]))
        rgb_color.addParseAction(lambda: self._set_colorspace('rgb'))

        jmh_color = pp.Suppress('jmh(') + pp.delimitedList(
            int_or_percent) + pp.Suppress(')')
        jmh_color.addParseAction(lambda t: (t[0], t[1], t[2]))
        jmh_color.addParseAction(lambda: self._set_colorspace('jmh'))

        color = hex_color ^ rgb_color ^ jmh_color

        grad_point = number('x') + pp.Optional(':') + color('y')
        grad_point.addParseAction(lambda t: self.grad_points.append(
            (t.x, t.y)))

        grad_points = pp.OneOrMore(grad_point + pp.lineEnd())
        return grad_points
Exemplo n.º 8
0
class SkyDriveLogParser(text_parser.PyparsingMultiLineTextParser):
  """Parses SkyDrive log files."""

  NAME = 'skydrive_log'
  DESCRIPTION = 'Parser for OneDrive (or SkyDrive) log files.'

  _ENCODING = 'utf-8'

  # Common SDF (SkyDrive Format) structures.
  _COMMA = pyparsing.Literal(',').suppress()
  _HYPHEN = text_parser.PyparsingConstants.HYPHEN

  _THREE_DIGITS = text_parser.PyparsingConstants.THREE_DIGITS
  _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS

  MSEC = pyparsing.Word(pyparsing.nums, max=3).setParseAction(
      text_parser.PyParseIntCast)
  IGNORE_FIELD = pyparsing.CharsNotIn(',').suppress()

  # Date and time format used in the header is: YYYY-MM-DD-hhmmss.###
  # For example: 2013-07-25-160323.291
  _SDF_HEADER_DATE_TIME = pyparsing.Group(
      text_parser.PyparsingConstants.DATE_ELEMENTS + _HYPHEN +
      _TWO_DIGITS.setResultsName('hours') +
      _TWO_DIGITS.setResultsName('minutes') +
      _TWO_DIGITS.setResultsName('seconds') +
      pyparsing.Literal('.').suppress() +
      _THREE_DIGITS.setResultsName('milliseconds')).setResultsName(
          'header_date_time')

  # Date and time format used in lines other than the header is:
  # MM-DD-YY,hh:mm:ss.###
  # For example: 07-25-13,16:06:31.820
  _SDF_DATE_TIME = (
      _TWO_DIGITS.setResultsName('month') + _HYPHEN +
      _TWO_DIGITS.setResultsName('day') + _HYPHEN +
      _TWO_DIGITS.setResultsName('year') + _COMMA +
      text_parser.PyparsingConstants.TIME_ELEMENTS + pyparsing.Suppress('.') +
      _THREE_DIGITS.setResultsName('milliseconds')).setResultsName(
          'date_time')

  _SDF_HEADER_START = (
      pyparsing.Literal('######').suppress() +
      pyparsing.Literal('Logging started.').setResultsName('log_start'))

  # Multiline entry end marker, matched from right to left.
  _SDF_ENTRY_END = pyparsing.StringEnd() | _SDF_HEADER_START | _SDF_DATE_TIME

  _SDF_LINE = (
      _SDF_DATE_TIME + _COMMA +
      IGNORE_FIELD + _COMMA + IGNORE_FIELD + _COMMA + IGNORE_FIELD + _COMMA +
      pyparsing.CharsNotIn(',').setResultsName('module') + _COMMA +
      pyparsing.CharsNotIn(',').setResultsName('source_code') + _COMMA +
      IGNORE_FIELD + _COMMA + IGNORE_FIELD + _COMMA +
      pyparsing.CharsNotIn(',').setResultsName('log_level') + _COMMA +
      pyparsing.SkipTo(_SDF_ENTRY_END).setResultsName('detail') +
      pyparsing.ZeroOrMore(pyparsing.lineEnd()))

  _SDF_HEADER = (
      _SDF_HEADER_START +
      pyparsing.Literal('Version=').setResultsName('version_string') +
      pyparsing.Word(pyparsing.nums + '.').setResultsName('version_number') +
      pyparsing.Literal('StartSystemTime:').suppress() +
      _SDF_HEADER_DATE_TIME +
      pyparsing.Literal('StartLocalTime:').setResultsName(
          'local_time_string') +
      pyparsing.SkipTo(pyparsing.lineEnd()).setResultsName('details') +
      pyparsing.lineEnd())

  LINE_STRUCTURES = [
      ('logline', _SDF_LINE),
      ('header', _SDF_HEADER)
  ]

  def _ParseHeader(self, parser_mediator, structure):
    """Parse header lines and store appropriate attributes.

    ['Logging started.', 'Version=', '17.0.2011.0627',
    [2013, 7, 25], 16, 3, 23, 291, 'StartLocalTime', '<details>']

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.
    """
    try:
      date_time = dfdatetime_time_elements.TimeElementsInMilliseconds(
          time_elements_tuple=structure.header_date_time)
    except ValueError:
      parser_mediator.ProduceExtractionWarning(
          'invalid date time value: {0!s}'.format(structure.header_date_time))
      return

    event_data = SkyDriveLogEventData()
    # TODO: refactor detail to individual event data attributes.
    event_data.detail = '{0:s} {1:s} {2:s} {3:s} {4:s}'.format(
        structure.log_start, structure.version_string,
        structure.version_number, structure.local_time_string,
        structure.details)

    event = time_events.DateTimeValuesEvent(
        date_time, definitions.TIME_DESCRIPTION_ADDED)
    parser_mediator.ProduceEventWithEventData(event, event_data)

  def _ParseLine(self, parser_mediator, structure):
    """Parses a logline and store appropriate attributes.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.
    """
    # TODO: Verify if date and time value is locale dependent.
    month, day_of_month, year, hours, minutes, seconds, milliseconds = (
        structure.date_time)

    year += 2000
    time_elements_tuple = (
        year, month, day_of_month, hours, minutes, seconds, milliseconds)

    try:
      date_time = dfdatetime_time_elements.TimeElementsInMilliseconds(
          time_elements_tuple=time_elements_tuple)
    except ValueError:
      parser_mediator.ProduceExtractionWarning(
          'invalid date time value: {0!s}'.format(structure.date_time))
      return

    event_data = SkyDriveLogEventData()
    # Replace newlines with spaces in structure.detail to preserve output.
    # TODO: refactor detail to individual event data attributes.
    event_data.detail = structure.detail.replace('\n', ' ')
    event_data.log_level = structure.log_level
    event_data.module = structure.module
    event_data.source_code = structure.source_code

    event = time_events.DateTimeValuesEvent(
        date_time, definitions.TIME_DESCRIPTION_ADDED)
    parser_mediator.ProduceEventWithEventData(event, event_data)

  def ParseRecord(self, parser_mediator, key, structure):
    """Parse each record structure and return an EventObject if applicable.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): identifier of the structure of tokens.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
    """
    if key not in ('header', 'logline'):
      raise errors.ParseError(
          'Unable to parse record, unknown structure: {0:s}'.format(key))

    if key == 'logline':
      self._ParseLine(parser_mediator, structure)

    elif key == 'header':
      self._ParseHeader(parser_mediator, structure)

  def VerifyStructure(self, parser_mediator, lines):
    """Verify that this file is a SkyDrive log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      lines (str): one or more lines from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
    try:
      structure = self._SDF_HEADER.parseString(lines)
    except pyparsing.ParseException:
      logger.debug('Not a SkyDrive log file')
      return False

    try:
      dfdatetime_time_elements.TimeElementsInMilliseconds(
          time_elements_tuple=structure.header_date_time)
    except ValueError:
      logger.debug(
          'Not a SkyDrive log file, invalid date and time: {0!s}'.format(
              structure.header_date_time))
      return False

    return True
Exemplo n.º 9
0
# C++ Syntax Description

import pyparsing as pp

from cpp_lang import *
from cpp_builders import *
from pp_utils import *

# comments need to be removed
comment = (pp.cStyleComment | pp.cppStyleComment)
preprocessor = pp.lineStart() + pp.Word('#', pp.alphas) + pp.SkipTo( pp.lineEnd() )
preprocessor.setWhitespaceChars(' \r\t')

identifier  = pp.Word( pp.alphas + '_', pp.alphanums + '_' )
persistency = pp.Keyword('static'  ).setParseAction( pp.replaceWith(TypeArgs.STATIC_TYPE)   )
volatility  = pp.Keyword('const'   ).setParseAction( pp.replaceWith(TypeArgs.CONST_TYPE )   ) \
            | pp.Keyword('volatile').setParseAction( pp.replaceWith(TypeArgs.VOLATILE_TYPE) )

reference = pp.Literal('*').setParseAction( pp.replaceWith(CppPointerTypeExpression.POINTER_VAR  ) ) \
          | pp.Literal('&').setParseAction( pp.replaceWith(CppPointerTypeExpression.REFERENCE_VAR) )

# member function on const object
const_function    = pp.Keyword('const'  ).setParseAction( pp.replaceWith(FunctionArgs.CONST_FUNCTION     ) )
virtual_function  = pp.Keyword('virtual').setParseAction( pp.replaceWith(FunctionArgs.VIRTUAL_FUNCTION   ) )
destructor_tag    = pp.Literal('~'      ).setParseAction( pp.replaceWith(FunctionArgs.DESTRUCTOR_FUNCTION) )
abstract_function = (pp.Literal('=') + pp.Literal('0')).setParseAction( pp.replaceWith(FunctionArgs.ABSTRACT_FUNCTION))
inline_function   = pp.Keyword('inline').setParseAction( pp.replaceWith(FunctionArgs.INLINE_FUNCTION) )

complex_type = pp.Keyword('class' ).setParseAction(pp.replaceWith(CppComplexTypeDefinition.CLASS )) \
             | pp.Keyword('struct').setParseAction(pp.replaceWith(CppComplexTypeDefinition.STRUCT)) \
             | pp.Keyword('union' ).setParseAction(pp.replaceWith(CppComplexTypeDefinition.UNION ))
Exemplo n.º 10
0
class GoogleDriveSyncLogParser(text_parser.PyparsingMultiLineTextParser):
    """Parses events from Google Drive Sync log files."""

    NAME = 'gdrive_synclog'
    DATA_FORMAT = 'Google Drive Sync log file'

    _ENCODING = 'utf-8'

    # Increase the buffer size, as log messages are often many lines of Python
    # object dumps or similar. The default is too small for this and results in
    # premature end of string matching on multi-line log entries.
    BUFFER_SIZE = 16384

    _HYPHEN = text_parser.PyparsingConstants.HYPHEN

    _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS
    _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS

    _GDS_DATE_TIME = pyparsing.Group(
        _FOUR_DIGITS.setResultsName('year') + _HYPHEN +
        _TWO_DIGITS.setResultsName('month') + _HYPHEN +
        _TWO_DIGITS.setResultsName('day') +
        text_parser.PyparsingConstants.TIME_MSEC_ELEMENTS +
        pyparsing.Word(pyparsing.printables).setResultsName('time_zone_offset')
    ).setResultsName('date_time')

    # Multiline entry end marker, matched from right to left.
    _GDS_ENTRY_END = pyparsing.StringEnd() | _GDS_DATE_TIME

    _GDS_LINE = (
        _GDS_DATE_TIME +
        pyparsing.Word(pyparsing.alphas).setResultsName('log_level') +
        # TODO: strip pid= out, cast to integers?
        pyparsing.Word(pyparsing.printables).setResultsName('pid') +
        # TODO: consider stripping thread identifier/cleaning up thread name?
        pyparsing.Word(pyparsing.printables).setResultsName('thread') +
        pyparsing.Word(pyparsing.printables).setResultsName('source_code') +
        pyparsing.SkipTo(_GDS_ENTRY_END).setResultsName('message') +
        pyparsing.ZeroOrMore(pyparsing.lineEnd()))

    LINE_STRUCTURES = [
        ('logline', _GDS_LINE),
    ]

    def _GetISO8601String(self, structure):
        """Retrieves an ISO 8601 date time string from the structure.

    The date and time values in Google Drive Sync log files are formatted as:
    "2018-01-24 18:25:08,454 -0800".

    Args:
      structure (pyparsing.ParseResults): structure of tokens derived from a
          line of a text file, that contains the time elements.

    Returns:
      str: ISO 8601 date time string.

    Raises:
      ValueError: if the structure cannot be converted into a date time string.
    """
        time_zone_offset = self._GetValueFromStructure(structure,
                                                       'time_zone_offset')

        try:
            time_zone_offset_hours = int(time_zone_offset[1:3], 10)
            time_zone_offset_minutes = int(time_zone_offset[3:5], 10)
        except (IndexError, TypeError, ValueError) as exception:
            raise ValueError(
                'unable to parse time zone offset with error: {0!s}.'.format(
                    exception))

        year = self._GetValueFromStructure(structure, 'year')
        month = self._GetValueFromStructure(structure, 'month')
        day_of_month = self._GetValueFromStructure(structure, 'day')
        hours = self._GetValueFromStructure(structure, 'hours')
        minutes = self._GetValueFromStructure(structure, 'minutes')
        seconds = self._GetValueFromStructure(structure, 'seconds')
        microseconds = self._GetValueFromStructure(structure, 'microseconds')

        try:
            iso8601 = (
                '{0:04d}-{1:02d}-{2:02d}T{3:02d}:{4:02d}:{5:02d}.{6:03d}'
                '{7:s}{8:02d}:{9:02d}').format(year, month, day_of_month,
                                               hours, minutes, seconds,
                                               microseconds,
                                               time_zone_offset[0],
                                               time_zone_offset_hours,
                                               time_zone_offset_minutes)
        except (TypeError, ValueError) as exception:
            raise ValueError(
                'unable to format date time string with error: {0!s}.'.format(
                    exception))

        return iso8601

    def _ParseRecordLogline(self, parser_mediator, structure):
        """Parses a logline record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.
    """
        date_time = dfdatetime_time_elements.TimeElementsInMilliseconds()

        time_elements_structure = self._GetValueFromStructure(
            structure, 'date_time')
        try:
            datetime_iso8601 = self._GetISO8601String(time_elements_structure)
            date_time.CopyFromStringISO8601(datetime_iso8601)
        except ValueError:
            parser_mediator.ProduceExtractionWarning(
                'invalid date time value: {0!s}'.format(
                    time_elements_structure))
            return

        # Replace newlines with spaces in structure.message to preserve output.
        message = self._GetValueFromStructure(structure, 'message')
        if message:
            message = message.replace('\n', ' ')

        event_data = GoogleDriveSyncLogEventData()
        event_data.log_level = self._GetValueFromStructure(
            structure, 'log_level')
        event_data.pid = self._GetValueFromStructure(structure, 'pid')
        event_data.thread = self._GetValueFromStructure(structure, 'thread')
        event_data.source_code = self._GetValueFromStructure(
            structure, 'source_code')
        event_data.message = message

        event = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_ADDED)

        parser_mediator.ProduceEventWithEventData(event, event_data)

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a log record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): identifier of the structure of tokens.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a line of a text file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key != 'logline':
            raise errors.ParseError(
                'Unable to parse record, unknown structure: {0:s}'.format(key))

        self._ParseRecordLogline(parser_mediator, structure)

    def VerifyStructure(self, parser_mediator, lines):
        """Verify that this file is a Google Drive Sync log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      lines (str): one or more lines from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
        try:
            structure = self._GDS_LINE.parseString(lines)
        except pyparsing.ParseException as exception:
            logger.debug(
                'Not a Google Drive Sync log file: {0!s}'.format(exception))
            return False

        date_time = dfdatetime_time_elements.TimeElementsInMilliseconds()

        date_time_string = self._GetValueFromStructure(structure, 'date_time')
        try:
            datetime_iso8601 = self._GetISO8601String(date_time_string)
            date_time.CopyFromStringISO8601(datetime_iso8601)
        except ValueError as exception:
            logger.debug(
                ('Not a Google Drive Sync log file, invalid date/time: {0!s} '
                 'with error: {1!s}').format(date_time_string, exception))
            return False

        return True
Exemplo n.º 11
0
class ApacheAccessParser(text_parser.PyparsingSingleLineTextParser):
    """Apache access log file parser"""

    NAME = 'apache_access'
    DESCRIPTION = 'Apache access Parser'

    MAX_LINE_LENGTH = 2048

    # Date format [18/Sep/2011:19:18:28 -0400]
    _DATE_TIME = pyparsing.Group(
        pyparsing.Suppress('[') +
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('day') +
        pyparsing.Suppress('/') +
        text_parser.PyparsingConstants.THREE_LETTERS.setResultsName('month') +
        pyparsing.Suppress('/') +
        text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName('year') +
        pyparsing.Suppress(':') +
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('hours') +
        pyparsing.Suppress(':') +
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('minutes') +
        pyparsing.Suppress(':') +
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('seconds') +
        pyparsing.Combine(
            pyparsing.oneOf(['-', '+']) + pyparsing.Word(
                pyparsing.nums, exact=4)).setResultsName('time_offset') +
        pyparsing.Suppress(']')).setResultsName('date_time')

    _HTTP_REQUEST = (pyparsing.Suppress('"') +
                     pyparsing.SkipTo('"').setResultsName('http_request') +
                     pyparsing.Suppress('"'))

    _REMOTE_NAME = (pyparsing.Word(pyparsing.alphanums)
                    | pyparsing.Literal('-')).setResultsName('remote_name')

    _RESPONSE_BYTES = (pyparsing.Literal('-')
                       | text_parser.PyparsingConstants.INTEGER
                       ).setResultsName('response_bytes')

    _REFERER = (pyparsing.Suppress('"') +
                pyparsing.SkipTo('"').setResultsName('referer') +
                pyparsing.Suppress('"'))

    _USER_AGENT = (pyparsing.Suppress('"') +
                   pyparsing.SkipTo('"').setResultsName('user_agent') +
                   pyparsing.Suppress('"'))

    _USER_NAME = (pyparsing.Word(pyparsing.alphanums)
                  | pyparsing.Literal('-')).setResultsName('user_name')

    # Defined in https://httpd.apache.org/docs/2.4/logs.html
    # format: "%h %l %u %t \"%r\" %>s %b"
    _COMMON_LOG_FORMAT_LINE = (
        text_parser.PyparsingConstants.IP_ADDRESS.setResultsName('ip_address')
        + _REMOTE_NAME + _USER_NAME + _DATE_TIME + _HTTP_REQUEST +
        text_parser.PyparsingConstants.INTEGER.setResultsName('response_code')
        + _RESPONSE_BYTES + pyparsing.lineEnd())

    # Defined in https://httpd.apache.org/docs/2.4/logs.html
    # format: "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\""
    _COMBINED_LOG_FORMAT_LINE = (
        text_parser.PyparsingConstants.IP_ADDRESS.setResultsName('ip_address')
        + _REMOTE_NAME + _USER_NAME + _DATE_TIME + _HTTP_REQUEST +
        text_parser.PyparsingConstants.INTEGER.setResultsName('response_code')
        + _RESPONSE_BYTES + _REFERER + _USER_AGENT + pyparsing.lineEnd())

    LINE_STRUCTURES = [('combined_log_format', _COMBINED_LOG_FORMAT_LINE),
                       ('common_log_format', _COMMON_LOG_FORMAT_LINE)]

    _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES])

    # TODO: migrate function after dfdatetime issue #47 is fixed.
    def _GetISO8601String(self, structure):
        """Normalize date time parsed format to an ISO 8601 date time string.
    The date and time values in Apache access log files are formatted as:
    "[18/Sep/2011:19:18:28 -0400]".

    Args:
      structure (pyparsing.ParseResults): structure of tokens derived from a
          line of a text file.

    Returns:
      str: ISO 8601 date time string.

    Raises:
      ValueError: if the structure cannot be converted into a date time string.
    """
        month = self._GetValueFromStructure(structure, 'month')

        try:
            month = timelib.MONTH_DICT.get(month.lower(), 0)
        except AttributeError as exception:
            raise ValueError(
                'unable to parse month with error: {0!s}.'.format(exception))

        time_offset = self._GetValueFromStructure(structure, 'time_offset')

        try:
            time_offset_hours = int(time_offset[1:3], 10)
            time_offset_minutes = int(time_offset[3:5], 10)
        except (IndexError, TypeError, ValueError) as exception:
            raise ValueError(
                'unable to parse time zone offset with error: {0!s}.'.format(
                    exception))

        year = self._GetValueFromStructure(structure, 'year')
        day_of_month = self._GetValueFromStructure(structure, 'day')
        hours = self._GetValueFromStructure(structure, 'hours')
        minutes = self._GetValueFromStructure(structure, 'minutes')
        seconds = self._GetValueFromStructure(structure, 'seconds')

        try:
            date_time_string = (
                '{0:04d}-{1:02d}-{2:02d}T{3:02d}:{4:02d}:{5:02d}.000000'
                '{6:s}{7:02d}:{8:02d}').format(year, month, day_of_month,
                                               hours, minutes, seconds,
                                               time_offset[0],
                                               time_offset_hours,
                                               time_offset_minutes)
        except ValueError as exception:
            raise ValueError(
                'unable to format date time string with error: {0!s}.'.format(
                    exception))

        return date_time_string

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a matching entry.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
        and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): elements parsed from the file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key not in self._SUPPORTED_KEYS:
            raise errors.ParseError(
                'Unable to parse record, unknown structure: {0:s}'.format(key))

        date_time = dfdatetime_time_elements.TimeElements()

        date_time_string = self._GetValueFromStructure(structure, 'date_time')

        try:
            iso_date_time = self._GetISO8601String(date_time_string)
            date_time.CopyFromStringISO8601(iso_date_time)
        except ValueError:
            parser_mediator.ProduceExtractionWarning(
                'invalid date time value: {0!s}'.format(date_time_string))
            return

        event = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_RECORDED)

        event_data = ApacheAccessEventData()
        event_data.ip_address = self._GetValueFromStructure(
            structure, 'ip_address')
        event_data.remote_name = self._GetValueFromStructure(
            structure, 'remote_name')
        event_data.user_name = self._GetValueFromStructure(
            structure, 'user_name')
        event_data.http_request = self._GetValueFromStructure(
            structure, 'http_request')
        event_data.http_response_code = self._GetValueFromStructure(
            structure, 'response_code')
        event_data.http_response_bytes = self._GetValueFromStructure(
            structure, 'response_bytes')

        if key == 'combined_log_format':
            event_data.http_request_referer = self._GetValueFromStructure(
                structure, 'referer')
            event_data.http_request_user_agent = self._GetValueFromStructure(
                structure, 'user_agent')

        parser_mediator.ProduceEventWithEventData(event, event_data)

    # pylint: disable=unused-argument
    def VerifyStructure(self, parser_mediator, line):
        """Verifies that this is an apache access log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
        and other components, such as storage and dfvfs.
      line (str): line from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
        return max(
            [parser.matches(line) for _, parser in self.LINE_STRUCTURES])
Exemplo n.º 12
0
class APTHistoryLogParser(text_parser.PyparsingSingleLineTextParser):
    """Parses for Advanced Packaging Tool (APT) History log files."""

    NAME = 'apt_history'

    DATA_FORMAT = 'Advanced Packaging Tool (APT) History log file'

    # APT History log lines can be very long.
    MAX_LINE_LENGTH = 65536

    _ENCODING = 'utf-8'

    _HYPHEN = text_parser.PyparsingConstants.HYPHEN

    _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS
    _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS

    _APTHISTORY_DATE_TIME = pyparsing.Group(_FOUR_DIGITS + _HYPHEN +
                                            _TWO_DIGITS + _HYPHEN +
                                            _TWO_DIGITS + _TWO_DIGITS +
                                            pyparsing.Suppress(':') +
                                            _TWO_DIGITS +
                                            pyparsing.Suppress(':') +
                                            _TWO_DIGITS)

    _RECORD_START = (
        # APT History logs may start with empty lines
        pyparsing.ZeroOrMore(pyparsing.lineEnd()) +
        pyparsing.Literal('Start-Date:') +
        _APTHISTORY_DATE_TIME.setResultsName('start_date') +
        pyparsing.lineEnd())

    _RECORD_BODY = (pyparsing.MatchFirst([
        pyparsing.Literal('Commandline:'),
        pyparsing.Literal('Downgrade:'),
        pyparsing.Literal('Error:'),
        pyparsing.Literal('Install:'),
        pyparsing.Literal('Purge:'),
        pyparsing.Literal('Remove:'),
        pyparsing.Literal('Requested-By:'),
        pyparsing.Literal('Upgrade:')
    ]) + pyparsing.restOfLine())

    _RECORD_END = (pyparsing.Literal('End-Date:') +
                   _APTHISTORY_DATE_TIME.setResultsName('end_date') +
                   pyparsing.OneOrMore(pyparsing.lineEnd()))

    LINE_STRUCTURES = [('record_start', _RECORD_START),
                       ('record_body', _RECORD_BODY),
                       ('record_end', _RECORD_END)]

    def __init__(self):
        """Initializes an APT History parser."""
        super(APTHistoryLogParser, self).__init__()
        self._date_time = None
        self._event_data = None
        self._downgrade = None
        self._install = None
        self._purge = None
        self._remove = None
        self._upgrade = None

    @staticmethod
    def _BuildDateTime(time_elements_structure):
        """Builds time elements from an APT History time stamp.

    Args:
      time_elements_structure (pyparsing.ParseResults): structure of tokens
          derived from an APT History time stamp.

    Returns:
      dfdatetime.TimeElements: date and time extracted from the structure or
          None f the structure does not represent a valid string.
    """
        # Ensure time_elements_tuple is not a pyparsing.ParseResults otherwise
        # copy.deepcopy() of the dfDateTime object will fail on Python 3.8 with:
        # "TypeError: 'str' object is not callable" due to pyparsing.ParseResults
        # overriding __getattr__ with a function that returns an empty string when
        # named token does not exists.
        try:
            year, month, day_of_month, hours, minutes, seconds = (
                time_elements_structure)

            date_time = dfdatetime_time_elements.TimeElements(
                time_elements_tuple=(year, month, day_of_month, hours, minutes,
                                     seconds))

            # APT History logs store date and time values in local time.
            date_time.is_local_time = True
            return date_time
        except (TypeError, ValueError):
            return None

    def _ParseRecordStart(self, parser_mediator, structure):
        """Parses the first line of a log record.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a log entry.
    """
        self._date_time = self._BuildDateTime(structure.get(
            'start_date', None))
        if not self._date_time:
            parser_mediator.ProduceExtractionWarning(
                'invalid date time value: {0!s}'.format(self._date_time))
            return

        self._event_data = APTHistoryLogEventData()
        return

    def _ParseRecordBody(self, structure):
        """Parses a line from the body of a log record.

    Args:
      structure (pyparsing.ParseResults): structure of tokens derived from
          a log entry.

    Raises:
      ParseError: when the date and time value is missing.
    """
        if not self._date_time:
            raise errors.ParseError('Missing date time value.')

        # Command data
        if structure[0] == 'Commandline:':
            self._event_data.command = ''.join(structure)

        elif structure[0] == 'Error:':
            self._event_data.error = ''.join(structure)

        elif structure[0] == 'Requested-By:':
            self._event_data.requester = ''.join(structure)

        # Package lists
        elif structure[0] == 'Downgrade:':
            self._downgrade = ''.join(structure)

        elif structure[0] == 'Install:':
            self._install = ''.join(structure)

        elif structure[0] == 'Purge:':
            self._purge = ''.join(structure)

        elif structure[0] == 'Remove:':
            self._remove = ''.join(structure)

        elif structure[0] == 'Upgrade:':
            self._upgrade = ''.join(structure)

    def _ParseRecordEnd(self, parser_mediator):
        """Parses the last line of a log record.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.

    Raises:
      ParseError: when the date and time value is missing.
    """
        if not self._date_time:
            raise errors.ParseError('Missing date time value.')

        # Create relevant events for record
        if self._downgrade:
            self._event_data.packages = self._downgrade
            event = time_events.DateTimeValuesEvent(
                self._date_time,
                definitions.TIME_DESCRIPTION_DOWNGRADE,
                time_zone=parser_mediator.timezone)
            parser_mediator.ProduceEventWithEventData(event, self._event_data)

        if self._install:
            self._event_data.packages = self._install
            event = time_events.DateTimeValuesEvent(
                self._date_time,
                definitions.TIME_DESCRIPTION_INSTALLATION,
                time_zone=parser_mediator.timezone)
            parser_mediator.ProduceEventWithEventData(event, self._event_data)

        if self._purge:
            self._event_data.packages = self._purge
            event = time_events.DateTimeValuesEvent(
                self._date_time,
                definitions.TIME_DESCRIPTION_DELETED,
                time_zone=parser_mediator.timezone)
            parser_mediator.ProduceEventWithEventData(event, self._event_data)

        if self._remove:
            self._event_data.packages = self._remove
            event = time_events.DateTimeValuesEvent(
                self._date_time,
                definitions.TIME_DESCRIPTION_DELETED,
                time_zone=parser_mediator.timezone)
            parser_mediator.ProduceEventWithEventData(event, self._event_data)

        if self._upgrade:
            self._event_data.packages = self._upgrade
            event = time_events.DateTimeValuesEvent(
                self._date_time,
                definitions.TIME_DESCRIPTION_UPDATE,
                time_zone=parser_mediator.timezone)
            parser_mediator.ProduceEventWithEventData(event, self._event_data)

    def _ResetState(self):
        """Resets stored values in the parser."""
        self._date_time = None
        self._downgrade = None
        self._event_data = None
        self._install = None
        self._purge = None
        self._remove = None
        self._upgrade = None

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a log record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): identifier of the structure of tokens.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a log entry.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key == 'record_start':
            self._ParseRecordStart(parser_mediator, structure)
            return

        if key == 'record_body':
            self._ParseRecordBody(structure)
            return

        if key == 'record_end':
            self._ParseRecordEnd(parser_mediator)
            # Reset for next record.
            self._ResetState()
            return

        raise errors.ParseError(
            'Unable to parse record, unknown structure: {0:s}'.format(key))

    def VerifyStructure(self, parser_mediator, line):
        """Verify that this file is an APT History log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (str): single line from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
        try:
            self._RECORD_START.parseString(line)
            # Reset stored values for parsing a new file.
            self._ResetState()
        except pyparsing.ParseException as exception:
            logger.debug(
                'Not an APT History log file: {0!s}'.format(exception))
            return False

        return True
Exemplo n.º 13
0
class SyslogParser(text_parser.PyparsingMultiLineTextParser):
  """Parses syslog formatted log files"""
  NAME = u'syslog'

  DESCRIPTION = u'Syslog Parser'

  _ENCODING = u'utf-8'

  _VERIFICATION_REGEX = re.compile(r'^\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}\s')

  _plugin_classes = {}

  # The reporter and facility fields can contain any printable character, but
  # to allow for processing of syslog formats that delimit the reporter and
  # facility with printable characters, we remove certain common delimiters
  # from the set of printable characters.
  _REPORTER_CHARACTERS = u''.join(
      [c for c in pyparsing.printables if c not in [u':', u'[', u'<']])
  _FACILITY_CHARACTERS = u''.join(
      [c for c in pyparsing.printables if c not in [u':', u'>']])

  _PYPARSING_COMPONENTS = {
      u'month': text_parser.PyparsingConstants.MONTH.setResultsName(u'month'),
      u'day': text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName(
          u'day'),
      u'hour': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
          u'hour'),
      u'minute': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
          u'minute'),
      u'second': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
          u'second'),
      u'fractional_seconds': pyparsing.Word(pyparsing.nums).setResultsName(
          u'fractional_seconds'),
      u'hostname': pyparsing.Word(pyparsing.printables).setResultsName(
          u'hostname'),
      u'reporter': pyparsing.Word(_REPORTER_CHARACTERS).setResultsName(
          u'reporter'),
      u'pid': text_parser.PyparsingConstants.PID.setResultsName(u'pid'),
      u'facility': pyparsing.Word(_FACILITY_CHARACTERS).setResultsName(
          u'facility'),
      u'body': pyparsing.Regex(
          r'.*?(?=($|\n\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}))', re.DOTALL).
               setResultsName(u'body'),
      u'comment_body': pyparsing.SkipTo(u' ---').setResultsName(
          u'body')
  }

  _PYPARSING_COMPONENTS[u'date'] = (
      _PYPARSING_COMPONENTS[u'month'] +
      _PYPARSING_COMPONENTS[u'day'] +
      _PYPARSING_COMPONENTS[u'hour'] + pyparsing.Suppress(u':') +
      _PYPARSING_COMPONENTS[u'minute'] + pyparsing.Suppress(u':') +
      _PYPARSING_COMPONENTS[u'second'] + pyparsing.Optional(
          pyparsing.Suppress(u'.') +
          _PYPARSING_COMPONENTS[u'fractional_seconds']))

  _SYSLOG_LINE = (
      _PYPARSING_COMPONENTS[u'date'] +
      _PYPARSING_COMPONENTS[u'hostname'] +
      _PYPARSING_COMPONENTS[u'reporter'] +
      pyparsing.Optional(
          pyparsing.Suppress(u'[') + _PYPARSING_COMPONENTS[u'pid'] +
          pyparsing.Suppress(u']')) +
      pyparsing.Optional(
          pyparsing.Suppress(u'<') + _PYPARSING_COMPONENTS[u'facility'] +
          pyparsing.Suppress(u'>')) +
      pyparsing.Optional(pyparsing.Suppress(u':')) +
      _PYPARSING_COMPONENTS[u'body'] + pyparsing.lineEnd())

  _SYSLOG_COMMENT = (
      _PYPARSING_COMPONENTS[u'date'] + pyparsing.Suppress(u':') +
      pyparsing.Suppress(u'---') + _PYPARSING_COMPONENTS[u'comment_body'] +
      pyparsing.Suppress(u'---') + pyparsing.LineEnd())

  _KERNEL_SYSLOG_LINE = (
      _PYPARSING_COMPONENTS[u'date'] +
      pyparsing.Literal(u'kernel').setResultsName(u'reporter') +
      pyparsing.Suppress(u':') + _PYPARSING_COMPONENTS[u'body'] +
      pyparsing.lineEnd())

  LINE_STRUCTURES = [
      (u'syslog_line', _SYSLOG_LINE),
      (u'syslog_line', _KERNEL_SYSLOG_LINE),
      (u'syslog_comment', _SYSLOG_COMMENT)]

  _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES])

  def __init__(self):
    """Initializes a parser object."""
    super(SyslogParser, self).__init__()
    self._last_month = 0
    self._maximum_year = 0
    self._plugin_objects_by_reporter = {}
    self._year_use = 0

  def _UpdateYear(self, mediator, month):
    """Updates the year to use for events, based on last observed month.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      month (int): month observed by the parser, where January is 1.
    """
    if not self._year_use:
      self._year_use = mediator.GetEstimatedYear()
    if not self._maximum_year:
      self._maximum_year = mediator.GetLatestYear()

    if not self._last_month:
      self._last_month = month
      return

    # Some syslog daemons allow out-of-order sequences, so allow some leeway
    # to not cause Apr->May->Apr to cause the year to increment.
    # See http://bugzilla.adiscon.com/show_bug.cgi?id=527
    if self._last_month > (month + 1):
      if self._year_use != self._maximum_year:
        self._year_use += 1
    self._last_month = month

  def EnablePlugins(self, plugin_includes):
    """Enables parser plugins.

    Args:
      plugin_includes (list[str]): names of the plugins to enable, where None
          or an empty list represents all plugins. Note that the default plugin
          is handled separately.
    """
    super(SyslogParser, self).EnablePlugins(plugin_includes)

    self._plugin_objects_by_reporter = {}
    for plugin_object in self._plugin_objects:
      self._plugin_objects_by_reporter[plugin_object.REPORTER] = plugin_object

  def ParseRecord(self, mediator, key, structure):
    """Parses a matching entry.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): elements parsed from the file.

    Raises:
      UnableToParseFile: if an unsupported key is provided.
    """
    if key not in self._SUPPORTED_KEYS:
      raise errors.UnableToParseFile(u'Unsupported key: {0:s}'.format(key))

    month = timelib.MONTH_DICT.get(structure.month.lower(), None)
    if not month:
      mediator.ProduceParserError(
          u'Invalid month value: {0:s}'.format(month))
      return

    self._UpdateYear(mediator, month)
    timestamp = timelib.Timestamp.FromTimeParts(
        year=self._year_use, month=month, day=structure.day,
        hour=structure.hour, minutes=structure.minute,
        seconds=structure.second, timezone=mediator.timezone)

    if key == u'syslog_comment':
      comment_attributes = {u'body': structure.body}
      event = SyslogCommentEvent(timestamp, 0, comment_attributes)
      mediator.ProduceEvent(event)
      return

    reporter = structure.reporter
    attributes = {
        u'hostname': structure.hostname,
        u'reporter': reporter,
        u'pid': structure.pid,
        u'body': structure.body}

    plugin_object = self._plugin_objects_by_reporter.get(reporter, None)
    if not plugin_object:
      event_object = SyslogLineEvent(timestamp, 0, attributes)
      mediator.ProduceEvent(event_object)

    else:
      try:
        plugin_object.Process(mediator, timestamp, attributes)

      except errors.WrongPlugin:
        event_object = SyslogLineEvent(timestamp, 0, attributes)
        mediator.ProduceEvent(event_object)

  def VerifyStructure(self, unused_mediator, line):
    """Verifies that this is a syslog-formatted file.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      line (str): single line from the text file.

    Returns:
      bool: whether the line appears to contain syslog content.
    """
    return re.match(self._VERIFICATION_REGEX, line) is not None
Exemplo n.º 14
0
class SyslogParser(text_parser.PyparsingMultiLineTextParser):
    """Parses syslog formatted log files"""
    NAME = u'syslog'

    DESCRIPTION = u'Syslog Parser'

    _VERIFICATION_REGEX = re.compile(r'^\w{3}\s\d{2}\s\d{2}:\d{2}:\d{2}\s')

    _plugin_classes = {}

    _PYPARSING_COMPONENTS = {
        u'month':
        text_parser.PyparsingConstants.MONTH.setResultsName(u'month'),
        u'day':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'day'),
        u'hour':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'hour'),
        u'minute':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'minute'),
        u'second':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'second'),
        u'fractional_seconds':
        pyparsing.Word(pyparsing.nums).setResultsName(u'fractional_seconds'),
        u'hostname':
        pyparsing.Word(pyparsing.printables).setResultsName(u'hostname'),
        u'reporter':
        pyparsing.Word(pyparsing.alphanums + u'.').setResultsName(u'reporter'),
        u'pid':
        text_parser.PyparsingConstants.PID.setResultsName(u'pid'),
        u'facility':
        pyparsing.Word(pyparsing.alphanums).setResultsName(u'facility'),
        u'body':
        pyparsing.Regex(r'.*?(?=($|\n\w{3}\s\d{2}\s\d{2}:\d{2}:\d{2}))',
                        re.DOTALL).setResultsName(u'body'),
        u'comment_body':
        pyparsing.SkipTo(u' ---').setResultsName(u'body')
    }

    _PYPARSING_COMPONENTS[u'date'] = (
        _PYPARSING_COMPONENTS[u'month'] + _PYPARSING_COMPONENTS[u'day'] +
        _PYPARSING_COMPONENTS[u'hour'] + pyparsing.Suppress(u':') +
        _PYPARSING_COMPONENTS[u'minute'] + pyparsing.Suppress(u':') +
        _PYPARSING_COMPONENTS[u'second'] + pyparsing.Optional(
            pyparsing.Suppress(u'.') +
            _PYPARSING_COMPONENTS[u'fractional_seconds']))

    _LINE_GRAMMAR = (
        _PYPARSING_COMPONENTS[u'date'] + _PYPARSING_COMPONENTS[u'hostname'] +
        _PYPARSING_COMPONENTS[u'reporter'] + pyparsing.Optional(
            pyparsing.Suppress(u'[') + _PYPARSING_COMPONENTS[u'pid'] +
            pyparsing.Suppress(u']')) + pyparsing.Optional(
                pyparsing.Suppress(u'<') + _PYPARSING_COMPONENTS[u'facility'] +
                pyparsing.Suppress(u'>')) +
        pyparsing.Optional(pyparsing.Suppress(u':')) +
        _PYPARSING_COMPONENTS[u'body'] + pyparsing.lineEnd())

    _SYSLOG_COMMENT = (_PYPARSING_COMPONENTS[u'date'] +
                       pyparsing.Suppress(u':') + pyparsing.Suppress(u'---') +
                       _PYPARSING_COMPONENTS[u'comment_body'] +
                       pyparsing.Suppress(u'---') + pyparsing.LineEnd())

    LINE_STRUCTURES = [(u'syslog_line', _LINE_GRAMMAR),
                       (u'syslog_comment', _SYSLOG_COMMENT)]

    _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES])

    def __init__(self):
        """Initializes a parser object."""
        super(SyslogParser, self).__init__()
        self._last_month = 0
        self._maximum_year = 0
        self._plugin_objects_by_reporter = {}
        self._year_use = 0

    def _UpdateYear(self, parser_mediator, month):
        """Updates the year to use for events, based on last observed month.

    Args:
      parser_mediator: a parser mediator object (instance of ParserMediator).
      month: an integer containing the month observed by the parser, where
             January is 1.
    """
        if not self._year_use:
            self._year_use = parser_mediator.GetEstimatedYear()
        if not self._maximum_year:
            self._maximum_year = parser_mediator.GetLatestYear()

        if not self._last_month:
            self._last_month = month
            return

        # Some syslog daemons allow out-of-order sequences, so allow some leeway
        # to not cause Apr->May->Apr to cause the year to increment.
        # See http://bugzilla.adiscon.com/show_bug.cgi?id=527
        if self._last_month > (month + 1):
            if self._year_use != self._maximum_year:
                self._year_use += 1
        self._last_month = month

    def EnablePlugins(self, plugin_includes):
        """Enables parser plugins.

    Args:
      plugin_includes: a list of strings containing the names of the plugins
                       to enable, where None or an empty list represents all
                       plugins. Not that the default plugin is handled
                       separately.
    """
        super(SyslogParser, self).EnablePlugins(plugin_includes)

        self._plugin_objects_by_reporter = {}
        for plugin_object in self._plugin_objects:
            self._plugin_objects_by_reporter[
                plugin_object.REPORTER] = plugin_object

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a matching entry.

    Args:
      parser_mediator: a parser mediator object (instance of ParserMediator).
      key: a string containing the name of the parsed structure.
      structure: the elements parsed from the file (instance of
                 pyparsing.ParseResults).

    Raises:
      UnableToParseFile: if an unsupported key is provided.
    """
        if key not in self._SUPPORTED_KEYS:
            raise errors.UnableToParseFile(
                u'Unsupported key: {0:s}'.format(key))

        month = timelib.MONTH_DICT.get(structure.month.lower(), None)
        if not month:
            parser_mediator.ProduceParserError(
                u'Invalid month value: {0:s}'.format(month))
            return

        self._UpdateYear(parser_mediator, month)
        timestamp = timelib.Timestamp.FromTimeParts(
            year=self._year_use,
            month=month,
            day=structure.day,
            hour=structure.hour,
            minutes=structure.minute,
            seconds=structure.second,
            timezone=parser_mediator.timezone)

        if key == u'syslog_comment':
            comment_attributes = {
                u'hostname': u'',
                u'reporter': u'',
                u'pid': u'',
                u'body': structure.body
            }
            event = SyslogCommentEvent(timestamp, 0, comment_attributes)
            parser_mediator.ProduceEvent(event)
            return

        reporter = structure.reporter
        attributes = {
            u'hostname': structure.hostname,
            u'reporter': reporter,
            u'pid': structure.pid,
            u'body': structure.body
        }

        plugin_object = self._plugin_objects_by_reporter.get(reporter, None)
        if not plugin_object:
            event_object = SyslogLineEvent(timestamp, 0, attributes)
            parser_mediator.ProduceEvent(event_object)

        else:
            try:
                plugin_object.Process(parser_mediator, timestamp, attributes)

            except errors.WrongPlugin:
                event_object = SyslogLineEvent(timestamp, 0, attributes)
                parser_mediator.ProduceEvent(event_object)

    def VerifyStructure(self, parser_mediator, lines):
        """Verifies that this is a syslog-formatted file.

    Args:
      parser_mediator: a parser mediator object (instance of ParserMediator).
      lines: a buffer that contains content from the file.

    Returns:
      A boolean value to indicate that passed buffer appears to contain syslog
      content.
    """
        return re.match(self._VERIFICATION_REGEX, lines) is not None
Exemplo n.º 15
0
class SetupapiLogParser(text_parser.PyparsingMultiLineTextParser):
    """Parses events from Windows Setupapi log files."""

    NAME = 'setupapi'

    DESCRIPTION = 'Parser for Windows Setupapi log files.'

    _ENCODING = 'utf-8'

    # Increase the buffer size, as log messages can be very long.
    BUFFER_SIZE = 262144

    _SLASH = pyparsing.Literal('/').suppress()

    _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS
    _THREE_DIGITS = text_parser.PyparsingConstants.THREE_DIGITS
    _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS

    _SETUPAPI_DATE_TIME = pyparsing.Group(
        _FOUR_DIGITS + _SLASH + _TWO_DIGITS + _SLASH + _TWO_DIGITS +
        _TWO_DIGITS + pyparsing.Suppress(':') + _TWO_DIGITS +
        pyparsing.Suppress(':') + _TWO_DIGITS +
        pyparsing.Word('.,', exact=1).suppress() + _THREE_DIGITS)

    _SETUPAPI_LINE = (
        pyparsing.SkipTo('>>>  [', include=True).suppress() +
        pyparsing.SkipTo(']').setResultsName('entry_type') +
        pyparsing.SkipTo('>>>  Section start', include=True).suppress() +
        _SETUPAPI_DATE_TIME.setResultsName('start_time') +
        pyparsing.SkipTo('<<<  Section end ').setResultsName('message') +
        pyparsing.GoToColumn(17) +
        _SETUPAPI_DATE_TIME.setResultsName('end_time') +
        pyparsing.SkipTo('<<<  [Exit status: ', include=True).suppress() +
        pyparsing.SkipTo(']').setResultsName('entry_status') +
        pyparsing.SkipTo(pyparsing.lineEnd()) +
        pyparsing.ZeroOrMore(pyparsing.lineEnd()))

    LINE_STRUCTURES = [
        ('logline', _SETUPAPI_LINE),
    ]

    def _ParseRecordLogline(self, parser_mediator, structure):
        """Parses a logline record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): structure of tokens derived from
          log entry.
    """
        time_zone = parser_mediator.timezone
        time_elements_structure = self._GetValueFromStructure(
            structure, 'start_time')
        try:
            date_time = dfdatetime_time_elements.TimeElementsInMilliseconds(
                time_elements_tuple=time_elements_structure)
            # Setupapi logs stores date and time values in local time.
            date_time.is_local_time = True
        except ValueError:
            parser_mediator.ProduceExtractionWarning(
                'invalid date time value: {0!s}'.format(
                    time_elements_structure))
            return

        event_data = SetupapiLogEventData()
        event_data.entry_type = self._GetValueFromStructure(
            structure, 'entry_type')
        event_data.entry_status = 'START'

        event = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_START, time_zone=time_zone)

        # Create event for the start of the setupapi section
        parser_mediator.ProduceEventWithEventData(event, event_data)

        event_data.entry_status = self._GetValueFromStructure(
            structure, 'entry_status')

        time_elements_structure = self._GetValueFromStructure(
            structure, 'end_time')
        try:
            date_time = dfdatetime_time_elements.TimeElementsInMilliseconds(
                time_elements_tuple=time_elements_structure)
            date_time.is_local_time = True
        except ValueError:
            parser_mediator.ProduceExtractionWarning(
                'invalid date time value: {0!s}'.format(
                    time_elements_structure))
            return

        event = time_events.DateTimeValuesEvent(
            date_time, definitions.TIME_DESCRIPTION_END, time_zone=time_zone)

        # Create event for the end of the setupapi section
        parser_mediator.ProduceEventWithEventData(event, event_data)

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a log record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): identifier of the structure of tokens.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a log entry.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key != 'logline':
            raise errors.ParseError(
                'Unable to parse record, unknown structure: {0:s}'.format(key))

        self._ParseRecordLogline(parser_mediator, structure)

    def VerifyStructure(self, parser_mediator, lines):
        """Verify that this file is a Windows Setupapi log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      lines (str): one or more lines from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
        try:
            structure = self._SETUPAPI_LINE.parseString(lines)
        except pyparsing.ParseException as exception:
            logger.debug(
                'Not a Windows Setupapi log file: {0!s}'.format(exception))
            return False

        time_elements_structure = self._GetValueFromStructure(
            structure, 'start_time')

        try:
            date_time = dfdatetime_time_elements.TimeElementsInMilliseconds(
                time_elements_tuple=time_elements_structure)
        except ValueError as exception:
            logger.debug(
                ('Not a Windows Setupapi log file, invalid date/time: {0!s} '
                 'with error: {1!s}').format(time_elements_structure,
                                             exception))
            return False

        if not date_time:
            logger.debug(
                ('Not a Windows Setupapi log file, '
                 'invalid date/time: {0!s}').format(time_elements_structure))
            return False

        return True
Exemplo n.º 16
0
class SyslogParser(text_parser.PyparsingMultiLineTextParser):
    """Parses syslog formatted log files"""
    NAME = 'syslog'

    DESCRIPTION = 'Syslog Parser'

    _ENCODING = 'utf-8'

    _plugin_classes = {}

    # The reporter and facility fields can contain any printable character, but
    # to allow for processing of syslog formats that delimit the reporter and
    # facility with printable characters, we remove certain common delimiters
    # from the set of printable characters.
    _REPORTER_CHARACTERS = ''.join(
        [c for c in pyparsing.printables if c not in [':', '[', '<']])
    _FACILITY_CHARACTERS = ''.join(
        [c for c in pyparsing.printables if c not in [':', '>']])

    _SYSLOG_SEVERITY = [
        'EMERG', 'ALERT', 'CRIT', 'ERR', 'WARNING', 'NOTICE', 'INFO', 'DEBUG'
    ]

    # TODO: change pattern to allow only spaces as a field separator.
    _BODY_PATTERN = (
        r'.*?(?=($|\n\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2})|' \
        r'($|\n\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}' \
        r'[\+|-]\d{2}:\d{2}\s))')

    # The rsyslog file format (RSYSLOG_FileFormat) consists of:
    # %TIMESTAMP% %HOSTNAME% %syslogtag%%msg%
    #
    # Where %TIMESTAMP% is in RFC-3339 date time format e.g.
    # 2020-05-31T00:00:45.698463+00:00
    _RSYSLOG_VERIFICATION_PATTERN = (r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.'
                                     r'\d{6}[\+|-]\d{2}:\d{2} ' +
                                     _BODY_PATTERN)

    # The rsyslog traditional file format (RSYSLOG_TraditionalFileFormat)
    # consists of:
    # %TIMESTAMP% %HOSTNAME% %syslogtag%%msg%
    #
    # Where %TIMESTAMP% is in yearless ctime date time format e.g.
    # Jan 22 07:54:32
    # TODO: change pattern to allow only spaces as a field separator.
    _RSYSLOG_TRADITIONAL_VERIFICATION_PATTERN = (
        r'^\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}\s' + _BODY_PATTERN)

    # The Chrome OS syslog messages are of a format beginning with an
    # ISO 8601 combined date and time expression with timezone designator:
    #   2016-10-25T12:37:23.297265-07:00
    #
    # This will then be followed by the SYSLOG Severity which will be one of:
    #   EMERG,ALERT,CRIT,ERR,WARNING,NOTICE,INFO,DEBUG
    #
    # 2016-10-25T12:37:23.297265-07:00 INFO
    _CHROMEOS_VERIFICATION_PATTERN = (
        r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.'
        r'\d{6}[\+|-]\d{2}:\d{2}\s'
        r'(EMERG|ALERT|CRIT|ERR|WARNING|NOTICE|INFO|DEBUG)' + _BODY_PATTERN)

    # Bundle all verification patterns into a single regular expression.
    _VERIFICATION_REGEX = re.compile('({0:s})'.format('|'.join([
        _CHROMEOS_VERIFICATION_PATTERN, _RSYSLOG_VERIFICATION_PATTERN,
        _RSYSLOG_TRADITIONAL_VERIFICATION_PATTERN
    ])))

    _PYPARSING_COMPONENTS = {
        'year':
        text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName('year'),
        'two_digit_month':
        (text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
            'two_digit_month')),
        'month':
        text_parser.PyparsingConstants.MONTH.setResultsName('month'),
        'day':
        text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName('day'),
        'hour':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('hour'),
        'minute':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('minute'),
        'second':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('second'),
        'fractional_seconds':
        pyparsing.Word(pyparsing.nums).setResultsName('fractional_seconds'),
        'hostname':
        pyparsing.Word(pyparsing.printables).setResultsName('hostname'),
        'reporter':
        pyparsing.Word(_REPORTER_CHARACTERS).setResultsName('reporter'),
        'pid':
        text_parser.PyparsingConstants.PID.setResultsName('pid'),
        'facility':
        pyparsing.Word(_FACILITY_CHARACTERS).setResultsName('facility'),
        'severity':
        pyparsing.oneOf(_SYSLOG_SEVERITY).setResultsName('severity'),
        'body':
        pyparsing.Regex(_BODY_PATTERN, re.DOTALL).setResultsName('body'),
        'comment_body':
        pyparsing.SkipTo(' ---').setResultsName('body')
    }

    _PYPARSING_COMPONENTS['date'] = (
        _PYPARSING_COMPONENTS['month'] + _PYPARSING_COMPONENTS['day'] +
        _PYPARSING_COMPONENTS['hour'] + pyparsing.Suppress(':') +
        _PYPARSING_COMPONENTS['minute'] + pyparsing.Suppress(':') +
        _PYPARSING_COMPONENTS['second'] + pyparsing.Optional(
            pyparsing.Suppress('.') +
            _PYPARSING_COMPONENTS['fractional_seconds']))

    _PYPARSING_COMPONENTS['rfc3339_datetime'] = pyparsing.Combine(
        pyparsing.Word(pyparsing.nums, exact=4) + pyparsing.Literal('-') +
        pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal('-') +
        pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal('T') +
        pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal(':') +
        pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal(':') +
        pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Literal('.') +
        pyparsing.Word(pyparsing.nums, exact=6) + pyparsing.oneOf(['-', '+']) +
        pyparsing.Word(pyparsing.nums, exact=2) + pyparsing.Optional(
            pyparsing.Literal(':') + pyparsing.Word(pyparsing.nums, exact=2)),
        joinString='',
        adjacent=True)

    _CHROMEOS_SYSLOG_LINE = (
        _PYPARSING_COMPONENTS['rfc3339_datetime'].setResultsName('datetime') +
        _PYPARSING_COMPONENTS['severity'] + _PYPARSING_COMPONENTS['reporter'] +
        pyparsing.Optional(pyparsing.Suppress(':')) + pyparsing.Optional(
            pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] +
            pyparsing.Suppress(']')) +
        pyparsing.Optional(pyparsing.Suppress(':')) +
        _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd())

    _RSYSLOG_LINE = (
        _PYPARSING_COMPONENTS['rfc3339_datetime'].setResultsName('datetime') +
        _PYPARSING_COMPONENTS['hostname'] + _PYPARSING_COMPONENTS['reporter'] +
        pyparsing.Optional(
            pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] +
            pyparsing.Suppress(']')) + pyparsing.Optional(
                pyparsing.Suppress('<') + _PYPARSING_COMPONENTS['facility'] +
                pyparsing.Suppress('>')) +
        pyparsing.Optional(pyparsing.Suppress(':')) +
        _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd())

    _RSYSLOG_TRADITIONAL_LINE = (
        _PYPARSING_COMPONENTS['date'] + _PYPARSING_COMPONENTS['hostname'] +
        _PYPARSING_COMPONENTS['reporter'] + pyparsing.Optional(
            pyparsing.Suppress('[') + _PYPARSING_COMPONENTS['pid'] +
            pyparsing.Suppress(']')) + pyparsing.Optional(
                pyparsing.Suppress('<') + _PYPARSING_COMPONENTS['facility'] +
                pyparsing.Suppress('>')) +
        pyparsing.Optional(pyparsing.Suppress(':')) +
        _PYPARSING_COMPONENTS['body'] + pyparsing.lineEnd())

    _SYSLOG_COMMENT = (_PYPARSING_COMPONENTS['date'] +
                       pyparsing.Suppress(':') + pyparsing.Suppress('---') +
                       _PYPARSING_COMPONENTS['comment_body'] +
                       pyparsing.Suppress('---') + pyparsing.LineEnd())

    _KERNEL_SYSLOG_LINE = (
        _PYPARSING_COMPONENTS['date'] +
        pyparsing.Literal('kernel').setResultsName('reporter') +
        pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['body'] +
        pyparsing.lineEnd())

    LINE_STRUCTURES = [('chromeos_syslog_line', _CHROMEOS_SYSLOG_LINE),
                       ('kernel_syslog_line', _KERNEL_SYSLOG_LINE),
                       ('rsyslog_line', _RSYSLOG_LINE),
                       ('rsyslog_traditional_line', _RSYSLOG_TRADITIONAL_LINE),
                       ('syslog_comment', _SYSLOG_COMMENT)]

    _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES])

    def __init__(self):
        """Initializes a parser."""
        super(SyslogParser, self).__init__()
        self._last_month = 0
        self._maximum_year = 0
        self._plugin_by_reporter = {}
        self._year_use = 0

    def _UpdateYear(self, mediator, month):
        """Updates the year to use for events, based on last observed month.

    Args:
      mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      month (int): month observed by the parser, where January is 1.
    """
        if not self._year_use:
            self._year_use = mediator.GetEstimatedYear()
        if not self._maximum_year:
            self._maximum_year = mediator.GetLatestYear()

        if not self._last_month:
            self._last_month = month
            return

        # Some syslog daemons allow out-of-order sequences, so allow some leeway
        # to not cause Apr->May->Apr to cause the year to increment.
        # See http://bugzilla.adiscon.com/show_bug.cgi?id=527
        if self._last_month > (month + 1):
            if self._year_use != self._maximum_year:
                self._year_use += 1
        self._last_month = month

    def EnablePlugins(self, plugin_includes):
        """Enables parser plugins.

    Args:
      plugin_includes (list[str]): names of the plugins to enable, where None
          or an empty list represents all plugins. Note that the default plugin
          is handled separately.
    """
        super(SyslogParser, self).EnablePlugins(plugin_includes)

        self._plugin_by_reporter = {}
        for plugin in self._plugins:
            self._plugin_by_reporter[plugin.REPORTER] = plugin

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a matching entry.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): elements parsed from the file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key not in self._SUPPORTED_KEYS:
            raise errors.ParseError(
                'Unable to parse record, unknown structure: {0:s}'.format(key))

        if key in ('chromeos_syslog_line', 'rsyslog_line'):
            date_time = dfdatetime_time_elements.TimeElementsInMicroseconds()
            iso8601_string = self._GetValueFromStructure(structure, 'datetime')

            try:
                date_time.CopyFromStringISO8601(iso8601_string)
            except ValueError:
                parser_mediator.ProduceExtractionWarning(
                    'invalid date time value: {0:s}'.format(iso8601_string))
                return

        else:
            # TODO: add support for fractional seconds.

            month = self._GetValueFromStructure(structure, 'month')
            try:
                month = timelib.MONTH_DICT.get(month.lower(), 0)
            except AttributeError:
                parser_mediator.ProduceExtractionWarning(
                    'invalid month value: {0!s}'.format(month))
                return

            if month != 0:
                self._UpdateYear(parser_mediator, month)

            day = self._GetValueFromStructure(structure, 'day')
            hours = self._GetValueFromStructure(structure, 'hour')
            minutes = self._GetValueFromStructure(structure, 'minute')
            seconds = self._GetValueFromStructure(structure, 'second')

            time_elements_tuple = (self._year_use, month, day, hours, minutes,
                                   seconds)

            try:
                date_time = dfdatetime_time_elements.TimeElements(
                    time_elements_tuple=time_elements_tuple)
                date_time.is_local_time = True
            except ValueError:
                parser_mediator.ProduceExtractionWarning(
                    'invalid date time value: {0!s}'.format(
                        time_elements_tuple))
                return

        plugin = None
        if key == 'syslog_comment':
            event_data = SyslogCommentEventData()
            event_data.body = self._GetValueFromStructure(structure, 'body')
            # TODO: pass line number to offset or remove.
            event_data.offset = 0

        else:
            event_data = SyslogLineEventData()
            event_data.body = self._GetValueFromStructure(structure, 'body')
            event_data.hostname = self._GetValueFromStructure(
                structure, 'hostname')
            # TODO: pass line number to offset or remove.
            event_data.offset = 0
            event_data.pid = self._GetValueFromStructure(structure, 'pid')
            event_data.reporter = self._GetValueFromStructure(
                structure, 'reporter')
            event_data.severity = self._GetValueFromStructure(
                structure, 'severity')

            plugin = self._plugin_by_reporter.get(event_data.reporter, None)
            if plugin:
                attributes = {
                    'body': event_data.body,
                    'hostname': event_data.hostname,
                    'pid': event_data.pid,
                    'reporter': event_data.reporter,
                    'severity': event_data.severity
                }

                try:
                    # TODO: pass event_data instead of attributes.
                    plugin.Process(parser_mediator, date_time, attributes)

                except errors.WrongPlugin:
                    plugin = None

        if not plugin:
            event = time_events.DateTimeValuesEvent(
                date_time, definitions.TIME_DESCRIPTION_WRITTEN)
            parser_mediator.ProduceEventWithEventData(event, event_data)

    def VerifyStructure(self, parser_mediator, lines):
        """Verifies that this is a syslog-formatted file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between
          parsers and other components, such as storage and dfvfs.
      lines (str): one or more lines from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
        return bool(self._VERIFICATION_REGEX.match(lines))
Exemplo n.º 17
0
class BashHistoryParser(text_parser.PyparsingMultiLineTextParser):
  """Parses events from Bash history files."""

  NAME = 'bash'

  DESCRIPTION = 'Parser for Bash history files'

  _ENCODING = 'utf-8'

  _TIMESTAMP = pyparsing.Suppress('#') + pyparsing.Word(
      pyparsing.nums, min=9, max=10).setParseAction(
          text_parser.PyParseIntCast).setResultsName('timestamp')

  _COMMAND = pyparsing.Regex(
      r'.*?(?=($|\n#\d{10}))', re.DOTALL).setResultsName('command')

  _LINE_GRAMMAR = _TIMESTAMP + _COMMAND + pyparsing.lineEnd()

  _VERIFICATION_GRAMMAR = (
      pyparsing.Regex(r'^\s?[^#].*?$', re.MULTILINE) + _TIMESTAMP +
      pyparsing.NotAny(pyparsing.pythonStyleComment))

  LINE_STRUCTURES = [('log_entry', _LINE_GRAMMAR)]

  def ParseRecord(self, parser_mediator, key, structure):
    """Parses a record and produces a Bash history event.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): elements parsed from the file.

    Raises:
      ParseError: when the structure type is unknown.
    """
    if key != 'log_entry':
      raise errors.ParseError(
          'Unable to parse record, unknown structure: {0:s}'.format(key))

    event_data = BashHistoryEventData()
    event_data.command = self._GetValueFromStructure(structure, 'command')

    timestamp = self._GetValueFromStructure(structure, 'timestamp')
    date_time = dfdatetime_posix_time.PosixTime(timestamp=timestamp)
    event = time_events.DateTimeValuesEvent(
        date_time, definitions.TIME_DESCRIPTION_MODIFICATION)
    parser_mediator.ProduceEventWithEventData(event, event_data)

  # pylint: disable=unused-argument
  def VerifyStructure(self, parser_mediator, lines):
    """Verifies that this is a bash history file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between
          parsers and other components, such as storage and dfvfs.
      lines (str): one or more lines from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
    match_generator = self._VERIFICATION_GRAMMAR.scanString(lines, maxMatches=1)
    return bool(list(match_generator))
Exemplo n.º 18
0
class SkyDriveLogParser(text_parser.PyparsingMultiLineTextParser):
    """Parses SkyDrive log files."""

    NAME = u'skydrive_log'
    DESCRIPTION = u'Parser for OneDrive (or SkyDrive) log files.'

    _ENCODING = u'utf-8'

    # Common SDF (SkyDrive Format) structures.
    INTEGER_CAST = text_parser.PyParseIntCast
    HYPHEN = text_parser.PyparsingConstants.HYPHEN
    TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS
    TIME_MSEC = text_parser.PyparsingConstants.TIME_MSEC
    MSEC = pyparsing.Word(pyparsing.nums, max=3).setParseAction(INTEGER_CAST)
    COMMA = pyparsing.Literal(u',').suppress()
    DOT = pyparsing.Literal(u'.').suppress()
    IGNORE_FIELD = pyparsing.CharsNotIn(u',').suppress()

    # Header line timestamp (2013-07-25-160323.291): the timestamp format is
    # YYYY-MM-DD-hhmmss.msec.
    SDF_HEADER_TIMESTAMP = pyparsing.Group(
        text_parser.PyparsingConstants.DATE.setResultsName(u'date') + HYPHEN +
        TWO_DIGITS.setResultsName(u'hh') + TWO_DIGITS.setResultsName(u'mm') +
        TWO_DIGITS.setResultsName(u'ss') + DOT +
        MSEC.setResultsName(u'ms')).setResultsName(u'hdr_timestamp')

    # Line timestamp (07-25-13,16:06:31.820): the timestamp format is
    # MM-DD-YY,hh:mm:ss.msec.
    SDF_TIMESTAMP = (
        TWO_DIGITS.setResultsName(u'month') + HYPHEN +
        TWO_DIGITS.setResultsName(u'day') + HYPHEN +
        TWO_DIGITS.setResultsName(u'year_short') + COMMA +
        TIME_MSEC.setResultsName(u'time')).setResultsName(u'timestamp')

    # Header start.
    SDF_HEADER_START = (
        pyparsing.Literal(u'######').suppress() +
        pyparsing.Literal(u'Logging started.').setResultsName(u'log_start'))

    # Multiline entry end marker, matched from right to left.
    SDF_ENTRY_END = pyparsing.StringEnd() | SDF_HEADER_START | SDF_TIMESTAMP

    # SkyDrive line pyparsing structure.
    SDF_LINE = (SDF_TIMESTAMP + COMMA + IGNORE_FIELD + COMMA + IGNORE_FIELD +
                COMMA + IGNORE_FIELD + COMMA +
                pyparsing.CharsNotIn(u',').setResultsName(u'module') + COMMA +
                pyparsing.CharsNotIn(u',').setResultsName(u'source_code') +
                COMMA + IGNORE_FIELD + COMMA + IGNORE_FIELD + COMMA +
                pyparsing.CharsNotIn(u',').setResultsName(u'log_level') +
                COMMA +
                pyparsing.SkipTo(SDF_ENTRY_END).setResultsName(u'detail') +
                pyparsing.ZeroOrMore(pyparsing.lineEnd()))

    # SkyDrive header pyparsing structure.
    SDF_HEADER = (
        SDF_HEADER_START +
        pyparsing.Literal(u'Version=').setResultsName(u'version_string') +
        pyparsing.Word(pyparsing.nums + u'.').setResultsName(u'version_number')
        + pyparsing.Literal(u'StartSystemTime:').suppress() +
        SDF_HEADER_TIMESTAMP + pyparsing.Literal(
            u'StartLocalTime:').setResultsName(u'local_time_string') +
        pyparsing.SkipTo(pyparsing.lineEnd()).setResultsName(u'details') +
        pyparsing.lineEnd())

    # Define the available log line structures.
    LINE_STRUCTURES = [(u'logline', SDF_LINE), (u'header', SDF_HEADER)]

    def __init__(self):
        """Initializes a parser object."""
        super(SkyDriveLogParser, self).__init__()
        self.use_local_zone = False

    def _GetTimestampFromHeader(self, structure):
        """Gets a timestamp from the structure.

    The following is an example of the timestamp structure expected
    [[2013, 7, 25], 16, 3, 23, 291]: DATE (year, month, day)  is the
    first list element, than hours, minutes, seconds and milliseconds follow.

    Args:
      structure: The parsed structure, which should be a timestamp.

    Returns:
      An integer containing the timestamp or 0 on error.
    """
        year, month, day = structure.date
        hour = structure.get(u'hh', 0)
        minute = structure.get(u'mm', 0)
        second = structure.get(u'ss', 0)
        microsecond = structure.get(u'ms', 0) * 1000

        return timelib.Timestamp.FromTimeParts(year,
                                               month,
                                               day,
                                               hour,
                                               minute,
                                               second,
                                               microseconds=microsecond)

    def _GetTimestampFromLine(self, structure):
        """Gets a timestamp from string from the structure

    The following is an example of the timestamp structure expected
    [7, 25, 13, [16, 3, 24], 649]: month, day, year, a list with three
    element (hours, minutes, seconds) and finally milliseconds.

    Args:
      structure: The parsed structure.

    Returns:
      An integer containing the timestamp or 0 on error.
    """
        hour, minute, second = structure.time[0]
        microsecond = structure.time[1] * 1000
        # TODO: Verify if timestamps are locale dependent.
        year = structure.get(u'year_short', 0)
        month = structure.get(u'month', 0)
        day = structure.get(u'day', 0)
        if year < 0 or not month or not day:
            return 0

        year += 2000

        return timelib.Timestamp.FromTimeParts(year,
                                               month,
                                               day,
                                               hour,
                                               minute,
                                               second,
                                               microseconds=microsecond)

    def _ParseHeader(self, structure):
        """Parse header lines and store appropriate attributes.

    [u'Logging started.', u'Version=', u'17.0.2011.0627',
    [2013, 7, 25], 16, 3, 23, 291, u'StartLocalTime', u'<details>']

    Args:
      structure: A pyparsing.ParseResults object from an header line in the
                 log file.

    Returns:
      An event object (instance of SkyDriveLogEvent) or None on error.
    """
        timestamp = self._GetTimestampFromHeader(structure.hdr_timestamp)
        if not timestamp:
            logging.debug(u'SkyDriveLog invalid timestamp {0:d}'.format(
                structure.hdr_timestamp))
            return
        detail = u'{0:s} {1:s} {2:s} {3:s} {4:s}'.format(
            structure.log_start, structure.version_string,
            structure.version_number, structure.local_time_string,
            structure.details)
        return SkyDriveLogEvent(timestamp, detail)

    def _ParseLine(self, structure):
        """Parse a logline and store appropriate attributes.

    Args:
      structure: A pyparsing.ParseResults object from a line in the log file.

    Returns:
      An event object (instance of SkyDriveLogEvent) or None.
    """
        timestamp = self._GetTimestampFromLine(structure.timestamp)
        if not timestamp:
            logging.debug(u'SkyDriveLog invalid timestamp {0:s}'.format(
                structure.timestamp))
            return

        # Replace newlines with spaces in structure.detail to preserve output.
        detail = structure.detail.replace(u'\n', u' ')
        return SkyDriveLogEvent(timestamp,
                                detail,
                                module=structure.module,
                                source_code=structure.source_code,
                                log_level=structure.log_level)

    def ParseRecord(self, parser_mediator, key, structure):
        """Parse each record structure and return an EventObject if applicable.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      key: An identification string indicating the name of the parsed
           structure.
      structure: A pyparsing.ParseResults object from a line in the
                 log file.
    """
        event_object = None

        if key == u'logline':
            event_object = self._ParseLine(structure)
        elif key == u'header':
            event_object = self._ParseHeader(structure)
        else:
            logging.warning(
                u'Unable to parse record, unknown structure: {0:s}'.format(
                    key))

        if event_object:
            parser_mediator.ProduceEvent(event_object)

    def VerifyStructure(self, parser_mediator, line):
        """Verify that this file is a SkyDrive log file.

    Args:
      parser_mediator: A parser mediator object (instance of ParserMediator).
      line: A single line from the text file.

    Returns:
      True if this is the correct parser, False otherwise.
    """
        try:
            parsed_structure = self.SDF_HEADER.parseString(line)
        except pyparsing.ParseException:
            logging.debug(u'Not a SkyDrive log file')
            return False

        timestamp = self._GetTimestampFromHeader(
            parsed_structure.hdr_timestamp)
        if not timestamp:
            logging.debug(
                u'Not a SkyDrive log file, invalid timestamp {0:s}'.format(
                    parsed_structure.timestamp))
            return False

        return True
Exemplo n.º 19
0
class SetupapiLogParser(text_parser.PyparsingSingleLineTextParser):
    """Parses events from Windows Setupapi log files."""

    NAME = 'setupapi'
    DATA_FORMAT = 'Windows SetupAPI log file'

    _ENCODING = 'utf-8'

    _SLASH = pyparsing.Literal('/').suppress()

    _FOUR_DIGITS = text_parser.PyparsingConstants.FOUR_DIGITS
    _THREE_DIGITS = text_parser.PyparsingConstants.THREE_DIGITS
    _TWO_DIGITS = text_parser.PyparsingConstants.TWO_DIGITS

    _SETUPAPI_DATE_TIME = pyparsing.Group(
        _FOUR_DIGITS + _SLASH + _TWO_DIGITS + _SLASH + _TWO_DIGITS +
        _TWO_DIGITS + pyparsing.Suppress(':') + _TWO_DIGITS +
        pyparsing.Suppress(':') + _TWO_DIGITS +
        pyparsing.Word('.,', exact=1).suppress() + _THREE_DIGITS)

    # Disable pylint due to long URLs for documenting structures.
    # pylint: disable=line-too-long

    # See https://docs.microsoft.com/en-us/windows-hardware/drivers/install/format-of-a-text-log-header
    _LOG_HEADER_START = (pyparsing.Literal('[Device Install Log]') +
                         pyparsing.lineEnd())

    # See https://docs.microsoft.com/en-us/windows-hardware/drivers/install/format-of-a-text-log-header
    _LOG_HEADER_END = (pyparsing.Literal('[BeginLog]') + pyparsing.lineEnd())

    # See https://docs.microsoft.com/en-us/windows-hardware/drivers/install/format-of-a-text-log-section-header
    _SECTION_HEADER = (pyparsing.Literal('>>>  [').suppress() +
                       pyparsing.CharsNotIn(']').setResultsName('entry_type') +
                       pyparsing.Literal(']') + pyparsing.lineEnd())

    # See https://docs.microsoft.com/en-us/windows-hardware/drivers/install/format-of-a-text-log-section-header
    _SECTION_HEADER_START = (
        pyparsing.Literal('>>>  Section start').suppress() +
        _SETUPAPI_DATE_TIME.setResultsName('start_time') + pyparsing.lineEnd())

    # See https://docs.microsoft.com/en-us/windows-hardware/drivers/install/format-of-a-text-log-section-footer
    _SECTION_END = (pyparsing.Literal('<<<  Section end ').suppress() +
                    _SETUPAPI_DATE_TIME.setResultsName('end_time') +
                    pyparsing.lineEnd())

    # See https://docs.microsoft.com/en-us/windows-hardware/drivers/install/format-of-a-text-log-section-footer
    _SECTION_END_EXIT_STATUS = (
        pyparsing.Literal('<<<  [Exit status: ').suppress() +
        pyparsing.CharsNotIn(']').setResultsName('exit_status') +
        pyparsing.Literal(']') + pyparsing.lineEnd())

    # See https://docs.microsoft.com/en-us/windows-hardware/drivers/install/format-of-log-entries-that-are-not-part-of-a-text-log-section
    _SECTION_BODY_LINE = (pyparsing.stringStart + pyparsing.MatchFirst([
        pyparsing.Literal('!!!  '),
        pyparsing.Literal('!    '),
        pyparsing.Literal('     ')
    ]) + pyparsing.restOfLine).leaveWhitespace()

    # See https://docs.microsoft.com/en-us/windows-hardware/drivers/install/format-of-log-entries-that-are-not-part-of-a-text-log-section
    _NON_SECTION_LINE = (pyparsing.stringStart + pyparsing.MatchFirst([
        pyparsing.Literal('   . '),
        pyparsing.Literal('!!!  '),
        pyparsing.Literal('!    '),
        pyparsing.Literal('     ')
    ]) + pyparsing.restOfLine).leaveWhitespace()

    # These lines do not appear to be documented in the Microsoft documentation.
    _BOOT_SESSION_LINE = (pyparsing.Literal('[Boot Session:') +
                          _SETUPAPI_DATE_TIME + pyparsing.Literal(']'))

    # pylint: enable=line-too-long

    LINE_STRUCTURES = [('ignorable_line', _BOOT_SESSION_LINE),
                       ('ignorable_line', _LOG_HEADER_END),
                       ('ignorable_line', _LOG_HEADER_START),
                       ('ignorable_line', _NON_SECTION_LINE),
                       ('ignorable_line', _SECTION_BODY_LINE),
                       ('section_end', _SECTION_END),
                       ('section_end_exit_status', _SECTION_END_EXIT_STATUS),
                       ('section_header', _SECTION_HEADER),
                       ('section_start', _SECTION_HEADER_START)]

    def __init__(self):
        """Initializes a setupapi parser."""
        super(SetupapiLogParser, self).__init__()
        self._last_end_time = None
        self._last_entry_type = None

    def _GetTimeElements(self, time_structure):
        """Builds time elements from a setupapi time_stamp field.

    Args:
      time_structure (pyparsing.ParseResults): structure of tokens derived from
          a setupapi time_stamp field.

    Returns:
      dfdatetime.TimeElements: date and time extracted from the value or None
          if the structure does not represent a valid date and time value.
    """
        try:
            date_time = dfdatetime_time_elements.TimeElementsInMilliseconds(
                time_elements_tuple=time_structure)
            # Setupapi logs store date and time values in local time.
            date_time.is_local_time = True
            return date_time

        except ValueError:
            return None

    def ParseRecord(self, parser_mediator, key, structure):
        """Parses a log record structure and produces events.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): identifier of the structure of tokens.
      structure (pyparsing.ParseResults): structure of tokens derived from
          a log entry.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key == 'ignorable_line':
            return

        if key == 'section_header':
            self._last_entry_type = self._GetValueFromStructure(
                structure, 'entry_type')
            return

        if key == 'section_start':
            time_structure = self._GetValueFromStructure(
                structure, 'start_time')
            start_time = self._GetTimeElements(time_structure)
            if not start_time:
                parser_mediator.ProduceExtractionWarning(
                    'invalid date time value: {0!s}'.format(time_structure))
                return

            event_data = SetupapiLogEventData()
            event_data.entry_type = self._last_entry_type

            event = time_events.DateTimeValuesEvent(
                start_time,
                definitions.TIME_DESCRIPTION_START,
                time_zone=parser_mediator.timezone)

            # Create event for the start of the setupapi section
            parser_mediator.ProduceEventWithEventData(event, event_data)
            return

        if key == 'section_end':
            time_structure = self._GetValueFromStructure(structure, 'end_time')
            end_time = self._GetTimeElements(time_structure)
            if not end_time:
                parser_mediator.ProduceExtractionWarning(
                    'invalid date time value: {0!s}'.format(time_structure))
            # Store last end time so that an event with the data from the
            # following exit status section can be created.
            self._last_end_time = end_time
            return

        if key == 'section_end_exit_status':
            exit_status = self._GetValueFromStructure(structure, 'exit_status')
            if self._last_end_time:
                event_data = SetupapiLogEventData()
                event_data.entry_type = self._last_entry_type
                event_data.exit_status = exit_status
                event = time_events.DateTimeValuesEvent(
                    self._last_end_time,
                    definitions.TIME_DESCRIPTION_END,
                    time_zone=parser_mediator.timezone)
                parser_mediator.ProduceEventWithEventData(event, event_data)
                # Reset entry type and status and end time in case a line is missing.
                self._last_entry_type = None
                self._last_end_time = None
                return

        raise errors.ParseError(
            'Unable to parse record, unknown structure: {0:s}'.format(key))

    def VerifyStructure(self, parser_mediator, line):
        """Verify that this file is a Windows Setupapi log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      line (str): single line from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
        try:
            self._LOG_HEADER_START.parseString(line)
            # Reset stored values for parsing a new file.
            self._last_end_time = None
            self._last_entry_type = None
        except pyparsing.ParseException as exception:
            logger.debug(
                'Not a Windows Setupapi log file: {0!s}'.format(exception))
            return False

        return True
Exemplo n.º 20
0
    def _parse_timing_summary_tables(time_rpt: str):
        """
        Return tables from a Vivado timing summary report.

        This currently only handles basic tables such as "Design Timing
        Summary" and "Clock Summary". The more complex data in "Timing Details"
        such as worst paths, etc. isn't parsed.
        """

        # Make newlines widely significant but be careful not to effect others
        saved_whitespace = pp.ParserElement.DEFAULT_WHITE_CHARS
        pp.ParserElement.setDefaultWhitespaceChars(" \t")

        # Extract table title ("Clock Summary") from a section heading like:
        #
        # --------------------------------------------------------------
        # | Clock Summary
        # | -------------
        # --------------------------------------------------------------
        sec_hline = pp.lineStart() + pp.Word("-") + pp.lineEnd()
        sec_row_start = pp.lineStart() + pp.Literal("|").suppress()
        sec_title = (sec_row_start + pp.SkipTo(pp.lineEnd())("title") +
                     pp.lineEnd().suppress())
        sec_title_uline = sec_row_start + pp.Suppress(
            pp.Word("-") + pp.lineEnd())

        section_head = sec_hline + sec_title + sec_title_uline + sec_hline

        blank_line = pp.Suppress(pp.lineEnd() * 2)

        # Tables are headings followed by lines and then data.
        #
        # Clock  Waveform(ns)         Period(ns)      Frequency(MHz)
        # -----  ------------         ----------      --------------
        # clk    {0.000 2.500}        5.000           200.000

        # Match two or more groups of dashes to avoid matching long single
        # horizontal lines used elsewhere. Normally the spaces between the
        # groups of dashes would be consumed, so get them back with
        # originalTextFor(). It would be safer to anchor this to the start of
        # the line, but "Design Timing Summary" and perhaps others indent the
        # table for some reason

        table_hline = pp.originalTextFor(pp.Word("-") * (2, ) + pp.lineEnd())

        # Get any header rows above the horizontal lines
        table_head = pp.SkipTo(table_hline, failOn=blank_line)

        # Get everything from the horizontal lines to an empty line
        table_body = pp.SkipTo(blank_line)

        # The adjacent argument shouldn't be required but it doesn't match
        # anything without it. It seems like the newline at the end of the
        # heading row may not be getting included somehow.

        table = pp.Combine(table_head + table_hline + table_body,
                           adjacent=False)("table")

        section = section_head + pp.lineEnd().suppress() + table

        # Restore whitespace characters
        pp.ParserElement.setDefaultWhitespaceChars(saved_whitespace)

        table_dict = {
            x["title"]: x["table"]
            for x in section.searchString(time_rpt)
        }

        return table_dict
Exemplo n.º 21
0
class ApacheAccessParser(text_parser.PyparsingSingleLineTextParser):
  """Apache access log (access.log) file parser."""

  NAME = 'apache_access'
  DATA_FORMAT = 'Apache access log (access.log) file'

  MAX_LINE_LENGTH = 2048

  # Date format [18/Sep/2011:19:18:28 -0400]
  _DATE_TIME = pyparsing.Group(
      pyparsing.Suppress('[') +
      text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('day') +
      pyparsing.Suppress('/') +
      text_parser.PyparsingConstants.THREE_LETTERS.setResultsName('month') +
      pyparsing.Suppress('/') +
      text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName('year') +
      pyparsing.Suppress(':') +
      text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('hours') +
      pyparsing.Suppress(':') +
      text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('minutes') +
      pyparsing.Suppress(':') +
      text_parser.PyparsingConstants.TWO_DIGITS.setResultsName('seconds') +
      pyparsing.Combine(
          pyparsing.oneOf(['-', '+']) +
          pyparsing.Word(
              pyparsing.nums, exact=4)).setResultsName('time_offset') +
      pyparsing.Suppress(']')).setResultsName('date_time')

  _HTTP_REQUEST = (
      pyparsing.Suppress('"') +
      pyparsing.SkipTo('" ').setResultsName('http_request') +
      pyparsing.Suppress('"'))

  _PORT_NUMBER = text_parser.PyparsingConstants.INTEGER.setResultsName(
      'port_number')

  _REMOTE_NAME = (
      pyparsing.Word(pyparsing.alphanums) |
      pyparsing.Literal('-')).setResultsName('remote_name')

  _RESPONSE_BYTES = (
      pyparsing.Literal('-') |
      text_parser.PyparsingConstants.INTEGER).setResultsName('response_bytes')

  _REFERER = (
      pyparsing.Suppress('"') +
      pyparsing.SkipTo('" ').setResultsName('referer') +
      pyparsing.Suppress('"'))

  _SERVER_NAME = (
      pyparsing.Word(pyparsing.alphanums + '-' + '.').setResultsName(
          'server_name'))

  _USER_AGENT = (
      pyparsing.Suppress('"') +
      pyparsing.SkipTo('"').setResultsName('user_agent') +
      pyparsing.Suppress('"'))

  _USER_NAME = (
      pyparsing.Word(pyparsing.alphanums) |
      pyparsing.Literal('-')).setResultsName('user_name')

  # Defined in https://httpd.apache.org/docs/2.4/logs.html
  # format: "%h %l %u %t \"%r\" %>s %b"
  _COMMON_LOG_FORMAT_LINE = (
      text_parser.PyparsingConstants.IP_ADDRESS.setResultsName('ip_address') +
      _REMOTE_NAME +
      _USER_NAME +
      _DATE_TIME +
      _HTTP_REQUEST +
      text_parser.PyparsingConstants.INTEGER.setResultsName('response_code') +
      _RESPONSE_BYTES +
      pyparsing.lineEnd())

  # Defined in https://httpd.apache.org/docs/2.4/logs.html
  # format: "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\""
  _COMBINED_LOG_FORMAT_LINE = (
      text_parser.PyparsingConstants.IP_ADDRESS.setResultsName('ip_address') +
      _REMOTE_NAME +
      _USER_NAME +
      _DATE_TIME +
      _HTTP_REQUEST +
      text_parser.PyparsingConstants.INTEGER.setResultsName('response_code') +
      _RESPONSE_BYTES +
      _REFERER +
      _USER_AGENT +
      pyparsing.lineEnd())

  # "vhost_combined" format as used by Debian and related distributions.
  # "%v:%p %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\""
  _VHOST_COMBINED_LOG_FORMAT = (
      _SERVER_NAME +
      pyparsing.Suppress(':') +
      _PORT_NUMBER +
      text_parser.PyparsingConstants.IP_ADDRESS.setResultsName('ip_address') +
      _REMOTE_NAME +
      _USER_NAME +
      _DATE_TIME +
      _HTTP_REQUEST +
      text_parser.PyparsingConstants.INTEGER.setResultsName('response_code') +
      _RESPONSE_BYTES +
      _REFERER +
      _USER_AGENT +
      pyparsing.lineEnd()
  )

  LINE_STRUCTURES = [
      ('combined_log_format', _COMBINED_LOG_FORMAT_LINE),
      ('common_log_format', _COMMON_LOG_FORMAT_LINE),
      ('vhost_combined_log_format', _VHOST_COMBINED_LOG_FORMAT)]

  _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES])

  def _GetDateTime(self, structure):
    """Retrieves the date and time from a date and time values structure.

    The date and time values in Apache access log files are formatted as:
    "[18/Sep/2011:19:18:28 -0400]".

    Args:
      structure (pyparsing.ParseResults): structure of tokens derived from a
          line of a text file.

    Returns:
      dfdatetime.DateTimeValues: date and time.

    Raises:
      ValueError: if the structure cannot be converted into a date time string.
    """
    year = self._GetValueFromStructure(structure, 'year')
    month = self._GetValueFromStructure(structure, 'month')

    try:
      month = self._MONTH_DICT.get(month.lower(), 0)
    except AttributeError as exception:
      raise ValueError('unable to parse month with error: {0!s}.'.format(
          exception))

    day_of_month = self._GetValueFromStructure(structure, 'day')
    hours = self._GetValueFromStructure(structure, 'hours')
    minutes = self._GetValueFromStructure(structure, 'minutes')
    seconds = self._GetValueFromStructure(structure, 'seconds')
    time_offset = self._GetValueFromStructure(structure, 'time_offset')

    try:
      time_zone_offset = int(time_offset[1:3], 10) * 60
      time_zone_offset += int(time_offset[3:5], 10)
      if time_offset[0] == '-':
        time_zone_offset *= -1

    except (TypeError, ValueError) as exception:
      raise ValueError(
          'unable to parse time zone offset with error: {0!s}.'.format(
              exception))

    time_elements_tuple = (year, month, day_of_month, hours, minutes, seconds)

    return dfdatetime_time_elements.TimeElements(
        time_elements_tuple=time_elements_tuple,
        time_zone_offset=time_zone_offset)

  def ParseRecord(self, parser_mediator, key, structure):
    """Parses a matching entry.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
        and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): elements parsed from the file.

    Raises:
      ParseError: when the structure type is unknown.
    """
    if key not in self._SUPPORTED_KEYS:
      raise errors.ParseError(
          'Unable to parse record, unknown structure: {0:s}'.format(key))

    date_time_string = self._GetValueFromStructure(structure, 'date_time')

    try:
      date_time = self._GetDateTime(date_time_string)
    except ValueError as exception:
      parser_mediator.ProduceExtractionWarning(
          'unable to parse date time value: {0!s} with error: {1!s}'.format(
              date_time_string, exception))
      return

    event = time_events.DateTimeValuesEvent(
        date_time, definitions.TIME_DESCRIPTION_RECORDED)

    event_data = ApacheAccessEventData()
    event_data.ip_address = self._GetValueFromStructure(structure, 'ip_address')
    event_data.remote_name = self._GetValueFromStructure(
        structure, 'remote_name')
    event_data.user_name = self._GetValueFromStructure(structure, 'user_name')
    event_data.http_request = self._GetValueFromStructure(
        structure, 'http_request')
    event_data.http_response_code = self._GetValueFromStructure(
        structure, 'response_code')
    event_data.http_response_bytes = self._GetValueFromStructure(
        structure, 'response_bytes')

    if key in ('combined_log_format', 'vhost_combined_log_format'):
      event_data.http_request_referer = self._GetValueFromStructure(
          structure, 'referer')
      event_data.http_request_user_agent = self._GetValueFromStructure(
          structure, 'user_agent')

    if key == 'vhost_combined_log_format':
      event_data.server_name = self._GetValueFromStructure(
          structure, 'server_name')
      event_data.port_number = self._GetValueFromStructure(
          structure, 'port_number')

    parser_mediator.ProduceEventWithEventData(event, event_data)

  # pylint: disable=unused-argument
  def VerifyStructure(self, parser_mediator, line):
    """Verifies that this is an apache access log file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
        and other components, such as storage and dfvfs.
      line (str): line from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
    return max([parser.matches(line) for _, parser in self.LINE_STRUCTURES])
Exemplo n.º 22
0
class GoogleLogParser(text_parser.PyparsingMultiLineTextParser):
  """A parser for Google log formatted files.

  See https://google.io/docs/python/guides/logging
  """
  NAME = 'googlelog'

  DESCRIPTION = 'Parser for handling Google log formatted files.'

  # PyParsing components used to construct grammars for parsing lines.
  _PYPARSING_COMPONENTS = {
      'priority': (
          pyparsing.oneOf(['I', 'W', 'E', 'F']).setResultsName('priority')),
      'year': text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName(
          'year'),
      'month_number': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
          'month_number'),
      'day': text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName(
          'day'),
      'hour': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
          'hour'),
      'minute': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
          'minute'),
      'second': text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
          'second'),
      'microsecond': pyparsing.Word(pyparsing.nums, exact=6).setParseAction(
          text_parser.PyParseIntCast).setResultsName('microsecond'),
      'thread_identifier': pyparsing.Word(pyparsing.nums).setResultsName(
          'thread_identifier'),
      'file_name':
          (pyparsing.Word(pyparsing.printables.replace(':', '')).setResultsName(
              'file_name')),
      'line_number': (
          pyparsing.Word(pyparsing.nums).setResultsName('line_number')),
      'message': (
          pyparsing.Regex('.*?(?=($|\n[IWEF][0-9]{4}))', re.DOTALL).
          setResultsName('message'))}

  _PYPARSING_COMPONENTS['date'] = (
      _PYPARSING_COMPONENTS['month_number'] +
      _PYPARSING_COMPONENTS['day'] +
      _PYPARSING_COMPONENTS['hour'] + pyparsing.Suppress(':') +
      _PYPARSING_COMPONENTS['minute'] + pyparsing.Suppress(':') +
      _PYPARSING_COMPONENTS['second'] + pyparsing.Optional(
          pyparsing.Suppress('.') +
          _PYPARSING_COMPONENTS['microsecond']))

  # Grammar for individual log event lines.
  _LOG_LINE = (
      _PYPARSING_COMPONENTS['priority'] + _PYPARSING_COMPONENTS['date'] +
      _PYPARSING_COMPONENTS['thread_identifier'] +
      _PYPARSING_COMPONENTS['file_name'] + pyparsing.Suppress(':') +
      _PYPARSING_COMPONENTS['line_number'] + pyparsing.Suppress('] ') +
      _PYPARSING_COMPONENTS['message'] + pyparsing.lineEnd())

  # Grammar for the log file greeting.
  _GREETING = (
      _PYPARSING_COMPONENTS['year'] + pyparsing.Suppress('/') +
      _PYPARSING_COMPONENTS['month_number'] + pyparsing.Suppress('/') +
      _PYPARSING_COMPONENTS['day'] + _PYPARSING_COMPONENTS['hour'] +
      pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['minute'] +
      pyparsing.Suppress(':') + _PYPARSING_COMPONENTS['second'] +
      pyparsing.Regex('.*?(?=($|\n[IWEF][0-9]{4}))', re.DOTALL) +
      pyparsing.lineEnd())

  _GREETING_START = 'Log file created at: '

  # Our initial buffer length is the length of the string we verify with.
  _INITIAL_BUFFER_SIZE = len(_GREETING_START)

  # Once we're sure we're reading a valid file, we'll use a larger read buffer.
  _RUNNING_BUFFER_SIZE = 5120

  # Order is important here, as the structures are checked against each line
  # sequentially, so we put the most common first, and the most expensive
  # last.
  LINE_STRUCTURES = [
      ('log_entry', _LOG_LINE),
      ('greeting_start', pyparsing.Literal(_GREETING_START)),
      ('greeting', _GREETING)]

  _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES])

  def __init__(self):
    """Initializes a Google log formatted file parser."""
    super(GoogleLogParser, self).__init__()

    # Set the size of the file we need to read to verify it.
    self._buffer_size = self._INITIAL_BUFFER_SIZE
    self._maximum_year = 0
    # The year to use for events. The initial year is stored in the log file
    # greeting.
    self._year = 0
    # The month the last observed event occurred.
    self._last_month = 0

  def _UpdateYear(self, mediator, month):
    """Updates the year to use for events, based on last observed month.

    Args:
      mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      month (int): month observed by the parser, where January is 1.
    """
    if not self._year:
      self._year = mediator.GetEstimatedYear()
    if not self._maximum_year:
      self._maximum_year = mediator.GetLatestYear()

    if not self._last_month:
      self._last_month = month
      return

    # TODO: Check whether out of order events are possible
    if self._last_month > (month + 1):
      if self._year != self._maximum_year:
        self._year += 1
    self._last_month = month

  def _ReadGreeting(self, structure):
    """Extract useful information from the logfile greeting.

    Args:
      structure (pyparsing.ParseResults): elements parsed from the file.
    """
    self._year = self._GetValueFromStructure(structure, 'year')
    self._last_month = self._GetValueFromStructure(structure, 'month_number')

  def _ParseLine(self, parser_mediator, structure):
    """Process a single log line into a GoogleLogEvent.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      structure (pyparsing.ParseResults): elements parsed from the file.
    """
    month = self._GetValueFromStructure(structure, 'month_number')

    if month != 0:
      self._UpdateYear(parser_mediator, month)

    day = self._GetValueFromStructure(structure, 'day')
    hours = self._GetValueFromStructure(structure, 'hour')
    minutes = self._GetValueFromStructure(structure, 'minute')
    seconds = self._GetValueFromStructure(structure, 'second')
    microseconds = self._GetValueFromStructure(structure, 'microsecond')

    time_elements_tuple = (
        self._year, month, day, hours, minutes, seconds, microseconds)

    try:
      date_time = dfdatetime_time_elements.TimeElementsInMicroseconds(
          time_elements_tuple=time_elements_tuple)
      date_time.is_local_time = True
    except ValueError:
      parser_mediator.ProduceExtractionWarning(
          'invalid date time value: {0!s}'.format(time_elements_tuple))
      return

    event_data = GoogleLogEventData()
    event_data.priority = self._GetValueFromStructure(structure, 'priority')
    event_data.thread_identifier = self._GetValueFromStructure(
        structure, 'thread_identifier')
    event_data.file_name = self._GetValueFromStructure(structure, 'file_name')
    event_data.line_number = self._GetValueFromStructure(
        structure, 'line_number')
    event_data.message = self._GetValueFromStructure(structure, 'message')

    event = time_events.DateTimeValuesEvent(
        date_time, definitions.TIME_DESCRIPTION_WRITTEN)
    parser_mediator.ProduceEventWithEventData(event, event_data)

  def ParseRecord(self, parser_mediator, key, structure):
    """Parses a matching entry.

    Args:
      parser_mediator (ParserMediator): mediates interactions between parsers
          and other components, such as storage and dfvfs.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): elements parsed from the file.

    Raises:
      ParseError: when the structure type is unknown.
    """
    if key not in self._SUPPORTED_KEYS:
      raise errors.ParseError(
          'Unable to parse record, unknown structure: {0:s}'.format(key))

    if key == 'greeting':
      self._ReadGreeting(structure)

    elif key == 'log_entry':
      self._ParseLine(parser_mediator, structure)

  def VerifyStructure(self, parser_mediator, lines):
    """Verifies that this is a google log-formatted file.

    Args:
      parser_mediator (ParserMediator): mediates interactions between
          parsers and other components, such as storage and dfvfs.
      lines (str): one or more lines from the text file.

    Returns:
      bool: True if this is the correct parser, False otherwise.
    """
    if not lines.startswith(self._GREETING_START):
      return False

    # Now that we know this is a valid log, expand the read buffer to the
    # maximum size we expect a log event to be (which is quite large).
    self._buffer_size = self._RUNNING_BUFFER_SIZE
    self._year = parser_mediator.year
    return True
    def dockerfile_instruction_grammar(self):
        """dockerfile_instruction_grammar"""

        #
        # Fail Action - Error template - Line / Col / Instruction
        #
        def error(s, loc, expr, error):
            """Main error template"""
            raise ParseFatalException(DOCKERFILE_ERROR[202].format(
                ligne=self.line_counter,
                colonne=error.loc,
                inst=self.currentInstructionName,
                erreur=error.msg))

        #
        # Parse Action (Basic verification)
        #

        def arg_validate(strng, loc, toks):
            """Do some verfications for the instruction arguments"""
            if not self.validator.validate_instruction(toks):
                raise ParseFatalException(self.validator.get_errors(), loc=loc)

        def instructions_parse(strng, loc, toks):
            """Check if the instruction exist in the config file"""

            self.currentInstructionName = toks[0]

            if toks[0] not in INSTRUCTION_CONFIG_LIST:
                raise ParseFatalException(DOCKERFILE_ERROR[211], loc=loc)

            self.currentInstruction = INSTRUCTION_CONFIG_LIST[toks[0]]

        def args_table_parse(strng, loc, toks):
            """Check if the table form is correct for the current instruction arguments"""

            if (self.currentInstruction[0] == 1):
                raise ParseFatalException(DOCKERFILE_ERROR[213], loc=loc)

        def args_list_parse(strng, loc, toks):
            """Check if the list form is correct for the current instruction arguments"""

            if (self.currentInstruction[0] == 2):
                raise ParseFatalException(DOCKERFILE_ERROR[214], loc=loc)

        def args_num_parse(strng, loc, toks):
            """Check if the number of arguments is correct"""

            minArg = self.currentInstruction[1]
            maxArg = self.currentInstruction[2]
            nbArgs = len(toks)
            if (not minArg <= nbArgs <= maxArg):
                raise ParseFatalException(DOCKERFILE_ERROR[215].format(
                    nombre=nbArgs, min=minArg, max=maxArg),
                                          loc=loc)

        def opt_parse(strng, loc, toks):
            """Check if the option exist and if she's correct for the current instruction"""

            if toks[0] not in OPTIONAL_OPTION_CONFIG:
                raise ParseFatalException(
                    DOCKERFILE_ERROR[216].format(opt=toks[0]), loc=loc)
            if self.currentInstructionName not in OPTIONAL_OPTION_CONFIG[
                    toks[0]]:
                raise ParseFatalException(
                    DOCKERFILE_ERROR[217].format(opt=toks[0]), loc=loc)

        #INIT
        ParserElement.setDefaultWhitespaceChars(" \t")

        #
        # TERMINALS
        #
        INST = Regex(r'([A-Z]+)(?<!\s)').setName('Instruction').setParseAction(
            instructions_parse)
        OPT = Regex(r'--[a-z]+=').setName('Option').setParseAction(opt_parse)

        STR = Regex(r'\"((.|\s)+?)\"').setName("chaîne de caractère")
        ARG = Regex(r'\S+').setName("argument")

        EOL = lineEnd().setName("fin de ligne").suppress()
        COM = Regex(r'#.*').suppress()

        OH = Literal('[').suppress()
        CH = Literal(']').suppress()
        CO = Literal(',').suppress()

        #
        # NO TERMINALS
        #
        #Arguments
        t_args_table = OH - STR - ZeroOrMore(CO - STR) - CH
        t_args_table.setName('["argument1", "argument2" …]')
        t_args_table.setParseAction(args_table_parse)

        t_args_list = ARG - ZeroOrMore(ARG)
        t_args_list.setName('argument1 argument2 …')
        t_args_list.setParseAction(args_list_parse)

        t_args = (t_args_table | t_args_list)
        t_args.setParseAction(args_num_parse)

        #Multiple lines separator
        continuation = '\\' - EOL

        #Optional elements
        t_opt = OneOrMore(OPT - Group(ARG))
        t_opt.setParseAction(opt_parse)

        #instruction
        instruction = (INST - Group(Optional(t_opt)) -
                       Group(t_args)).setParseAction(arg_validate)

        #line grammar
        line = (stringStart - (COM | Optional(instruction)) - EOL -
                stringEnd()).setFailAction(error)

        line.ignore(continuation)

        return line
Exemplo n.º 24
0
class SyslogParser(text_parser.PyparsingMultiLineTextParser):
    """Parses syslog formatted log files"""
    NAME = u'syslog'

    DESCRIPTION = u'Syslog Parser'

    _ENCODING = u'utf-8'

    _plugin_classes = {}

    # The reporter and facility fields can contain any printable character, but
    # to allow for processing of syslog formats that delimit the reporter and
    # facility with printable characters, we remove certain common delimiters
    # from the set of printable characters.
    _REPORTER_CHARACTERS = u''.join(
        [c for c in pyparsing.printables if c not in [u':', u'[', u'<']])
    _FACILITY_CHARACTERS = u''.join(
        [c for c in pyparsing.printables if c not in [u':', u'>']])

    _SYSLOG_SEVERITY = [
        u'EMERG', u'ALERT', u'CRIT', u'ERR', u'WARNING', u'NOTICE', u'INFO',
        u'DEBUG'
    ]

    _OFFSET_PREFIX = [u'-', u'+']

    _BODY_CONTENT = (r'.*?(?=($|\n\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2})|' \
                     r'($|\n\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{6}' \
                     r'[\+|-]\d{2}:\d{2}\s))')

    _VERIFICATION_REGEX = \
        re.compile(r'^\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}\s' +
                   _BODY_CONTENT)

    # The Chrome OS syslog messages are of a format begininng with an
    # ISO 8601 combined date and time expression with timezone designator:
    #   2016-10-25T12:37:23.297265-07:00
    #
    # This will then be followed by the SYSLOG Severity which will be one of:
    #   EMERG,ALERT,CRIT,ERR,WARNING,NOTICE,INFO,DEBUG
    #
    # 2016-10-25T12:37:23.297265-07:00 INFO
    _CHROMEOS_VERIFICATION_REGEX = \
        re.compile(r'^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.'
                   r'\d{6}[\+|-]\d{2}:\d{2}\s'
                   r'(EMERG|ALERT|CRIT|ERR|WARNING|NOTICE|INFO|DEBUG)' +
                   _BODY_CONTENT)

    _PYPARSING_COMPONENTS = {
        u'year':
        text_parser.PyparsingConstants.FOUR_DIGITS.setResultsName(u'year'),
        u'two_digit_month':
        (text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(
            u'two_digit_month')),
        u'month':
        text_parser.PyparsingConstants.MONTH.setResultsName(u'month'),
        u'day':
        text_parser.PyparsingConstants.ONE_OR_TWO_DIGITS.setResultsName(
            u'day'),
        u'hour':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'hour'),
        u'minute':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'minute'),
        u'second':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'second'),
        u'fractional_seconds':
        pyparsing.Word(pyparsing.nums).setResultsName(u'fractional_seconds'),
        u'hostname':
        pyparsing.Word(pyparsing.printables).setResultsName(u'hostname'),
        u'reporter':
        pyparsing.Word(_REPORTER_CHARACTERS).setResultsName(u'reporter'),
        u'pid':
        text_parser.PyparsingConstants.PID.setResultsName(u'pid'),
        u'facility':
        pyparsing.Word(_FACILITY_CHARACTERS).setResultsName(u'facility'),
        u'severity':
        pyparsing.oneOf(_SYSLOG_SEVERITY).setResultsName(u'severity'),
        u'body':
        pyparsing.Regex(_BODY_CONTENT, re.DOTALL).setResultsName(u'body'),
        u'comment_body':
        pyparsing.SkipTo(u' ---').setResultsName(u'body'),
        u'iso_8601_offset':
        (pyparsing.oneOf(_OFFSET_PREFIX) +
         text_parser.PyparsingConstants.TWO_DIGITS + pyparsing.Optional(
             pyparsing.Literal(u':') +
             text_parser.PyparsingConstants.TWO_DIGITS))
    }

    _PYPARSING_COMPONENTS[u'date'] = (
        _PYPARSING_COMPONENTS[u'month'] + _PYPARSING_COMPONENTS[u'day'] +
        _PYPARSING_COMPONENTS[u'hour'] + pyparsing.Suppress(u':') +
        _PYPARSING_COMPONENTS[u'minute'] + pyparsing.Suppress(u':') +
        _PYPARSING_COMPONENTS[u'second'] + pyparsing.Optional(
            pyparsing.Suppress(u'.') +
            _PYPARSING_COMPONENTS[u'fractional_seconds']))

    _PYPARSING_COMPONENTS[u'iso_8601_date'] = pyparsing.Combine(
        _PYPARSING_COMPONENTS[u'year'] + pyparsing.Literal(u'-') +
        _PYPARSING_COMPONENTS[u'two_digit_month'] + pyparsing.Literal(u'-') +
        _PYPARSING_COMPONENTS[u'day'] + pyparsing.Literal(u'T') +
        _PYPARSING_COMPONENTS[u'hour'] + pyparsing.Literal(u':') +
        _PYPARSING_COMPONENTS[u'minute'] + pyparsing.Literal(u':') +
        _PYPARSING_COMPONENTS[u'second'] + pyparsing.Literal(u'.') +
        _PYPARSING_COMPONENTS[u'fractional_seconds'] +
        _PYPARSING_COMPONENTS[u'iso_8601_offset'],
        joinString=u'',
        adjacent=True).setResultsName(u'iso_8601_date')

    _CHROMEOS_SYSLOG_LINE = (
        _PYPARSING_COMPONENTS[u'iso_8601_date'] +
        _PYPARSING_COMPONENTS[u'severity'] +
        _PYPARSING_COMPONENTS[u'reporter'] +
        pyparsing.Optional(pyparsing.Suppress(u':')) + pyparsing.Optional(
            pyparsing.Suppress(u'[') + _PYPARSING_COMPONENTS[u'pid'] +
            pyparsing.Suppress(u']')) +
        pyparsing.Optional(pyparsing.Suppress(u':')) +
        _PYPARSING_COMPONENTS[u'body'] + pyparsing.lineEnd())

    _SYSLOG_LINE = (
        _PYPARSING_COMPONENTS[u'date'] + _PYPARSING_COMPONENTS[u'hostname'] +
        _PYPARSING_COMPONENTS[u'reporter'] + pyparsing.Optional(
            pyparsing.Suppress(u'[') + _PYPARSING_COMPONENTS[u'pid'] +
            pyparsing.Suppress(u']')) + pyparsing.Optional(
                pyparsing.Suppress(u'<') + _PYPARSING_COMPONENTS[u'facility'] +
                pyparsing.Suppress(u'>')) +
        pyparsing.Optional(pyparsing.Suppress(u':')) +
        _PYPARSING_COMPONENTS[u'body'] + pyparsing.lineEnd())

    _SYSLOG_COMMENT = (_PYPARSING_COMPONENTS[u'date'] +
                       pyparsing.Suppress(u':') + pyparsing.Suppress(u'---') +
                       _PYPARSING_COMPONENTS[u'comment_body'] +
                       pyparsing.Suppress(u'---') + pyparsing.LineEnd())

    _KERNEL_SYSLOG_LINE = (
        _PYPARSING_COMPONENTS[u'date'] +
        pyparsing.Literal(u'kernel').setResultsName(u'reporter') +
        pyparsing.Suppress(u':') + _PYPARSING_COMPONENTS[u'body'] +
        pyparsing.lineEnd())

    LINE_STRUCTURES = [(u'syslog_line', _SYSLOG_LINE),
                       (u'syslog_line', _KERNEL_SYSLOG_LINE),
                       (u'syslog_comment', _SYSLOG_COMMENT),
                       (u'chromeos_syslog_line', _CHROMEOS_SYSLOG_LINE)]

    _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES])

    def __init__(self):
        """Initializes a parser object."""
        super(SyslogParser, self).__init__()
        self._last_month = 0
        self._maximum_year = 0
        self._plugin_objects_by_reporter = {}
        self._year_use = 0

    def _UpdateYear(self, mediator, month):
        """Updates the year to use for events, based on last observed month.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      month (int): month observed by the parser, where January is 1.
    """
        if not self._year_use:
            self._year_use = mediator.GetEstimatedYear()
        if not self._maximum_year:
            self._maximum_year = mediator.GetLatestYear()

        if not self._last_month:
            self._last_month = month
            return

        # Some syslog daemons allow out-of-order sequences, so allow some leeway
        # to not cause Apr->May->Apr to cause the year to increment.
        # See http://bugzilla.adiscon.com/show_bug.cgi?id=527
        if self._last_month > (month + 1):
            if self._year_use != self._maximum_year:
                self._year_use += 1
        self._last_month = month

    def EnablePlugins(self, plugin_includes):
        """Enables parser plugins.

    Args:
      plugin_includes (list[str]): names of the plugins to enable, where None
          or an empty list represents all plugins. Note that the default plugin
          is handled separately.
    """
        super(SyslogParser, self).EnablePlugins(plugin_includes)

        self._plugin_objects_by_reporter = {}
        for plugin_object in self._plugin_objects:
            self._plugin_objects_by_reporter[
                plugin_object.REPORTER] = plugin_object

    def ParseRecord(self, mediator, key, structure):
        """Parses a matching entry.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): elements parsed from the file.

    Raises:
      ParseError: when the structure type is unknown.
    """
        if key not in self._SUPPORTED_KEYS:
            raise errors.ParseError(
                u'Unable to parse record, unknown structure: {0:s}'.format(
                    key))

        if key == u'chromeos_syslog_line':
            timestamp = timelib.Timestamp.FromTimeString(
                structure.iso_8601_date[0])
        else:
            month = timelib.MONTH_DICT.get(structure.month.lower(), None)
            if not month:
                mediator.ProduceParserError(
                    u'Invalid month value: {0:s}'.format(month))
                return

            self._UpdateYear(mediator, month)
            timestamp = timelib.Timestamp.FromTimeParts(
                year=self._year_use,
                month=month,
                day=structure.day,
                hour=structure.hour,
                minutes=structure.minute,
                seconds=structure.second,
                timezone=mediator.timezone)

        plugin_object = None
        if key == u'syslog_comment':
            event_data = SyslogCommentEventData()
            event_data.body = structure.body
            # TODO: pass line number to offset or remove.
            event_data.offset = 0

        else:
            event_data = SyslogLineEventData()
            event_data.body = structure.body
            event_data.hostname = structure.hostname or None
            # TODO: pass line number to offset or remove.
            event_data.offset = 0
            event_data.pid = structure.pid
            event_data.reporter = structure.reporter
            event_data.severity = structure.severity

            plugin_object = self._plugin_objects_by_reporter.get(
                structure.reporter, None)
            if plugin_object:
                attributes = {
                    u'hostname': structure.hostname,
                    u'severity': structure.severity,
                    u'reporter': structure.reporter,
                    u'pid': structure.pid,
                    u'body': structure.body
                }

                try:
                    # TODO: pass event_data instead of attributes.
                    plugin_object.Process(mediator, timestamp, attributes)

                except errors.WrongPlugin:
                    plugin_object = None

        if not plugin_object:
            event = time_events.TimestampEvent(
                timestamp, eventdata.EventTimestamp.WRITTEN_TIME)
            mediator.ProduceEventWithEventData(event, event_data)

    def VerifyStructure(self, unused_mediator, line):
        """Verifies that this is a syslog-formatted file.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      line (str): single line from the text file.

    Returns:
      bool: whether the line appears to contain syslog content.
    """
        return (re.match(self._VERIFICATION_REGEX, line) or re.match(
            self._CHROMEOS_VERIFICATION_REGEX, line)) is not None
Exemplo n.º 25
0
class Exim4Parser(text_parser.PyparsingSingleLineTextParser):
    """Parses exim4 formatted log files"""
    NAME = u'exim4'

    DESCRIPTION = u'Exim4 Parser'

    _ENCODING = u'utf-8'

    _VERIFICATION_REGEX = re.compile(
        r'^\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}\s')

    _plugin_classes = {}

    # The reporter and facility fields can contain any printable character, but
    # to allow for processing of syslog formats that delimit the reporter and
    # facility with printable characters, we remove certain common delimiters
    # from the set of printable characters.
    _REPORTER_CHARACTERS = u''.join(
        [c for c in pyparsing.printables if c not in [u':', u'[', u'<']])
    _FACILITY_CHARACTERS = u''.join(
        [c for c in pyparsing.printables if c not in [u':', u'>']])

    _PYPARSING_COMPONENTS = {
        u'year':
        text_parser.PyparsingConstants.YEAR.setResultsName(u'year'),
        u'month':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'month'),
        u'day':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'day'),
        u'hour':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'hour'),
        u'minute':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'minute'),
        u'second':
        text_parser.PyparsingConstants.TWO_DIGITS.setResultsName(u'second'),
        u'body':
        pyparsing.Regex(r'.*?(?=($|\n\w{3}\s+\d{1,2}\s\d{2}:\d{2}:\d{2}))',
                        re.DOTALL).setResultsName(u'body')
    }

    _PYPARSING_COMPONENTS[u'date'] = (
        _PYPARSING_COMPONENTS[u'year'] + pyparsing.Suppress(u'-') +
        _PYPARSING_COMPONENTS[u'month'] + pyparsing.Suppress(u'-') +
        _PYPARSING_COMPONENTS[u'day'] + _PYPARSING_COMPONENTS[u'hour'] +
        pyparsing.Suppress(u':') + _PYPARSING_COMPONENTS[u'minute'] +
        pyparsing.Suppress(u':') + _PYPARSING_COMPONENTS[u'second'])

    _EXIM4_LINE = (_PYPARSING_COMPONENTS[u'date'] +
                   pyparsing.Optional(pyparsing.Suppress(u':')) +
                   _PYPARSING_COMPONENTS[u'body'] + pyparsing.lineEnd())

    LINE_STRUCTURES = [(u'exim4_line', _EXIM4_LINE)]

    _SUPPORTED_KEYS = frozenset([key for key, _ in LINE_STRUCTURES])

    def __init__(self):
        """Initializes a parser object."""
        super(Exim4Parser, self).__init__()
        self._last_month = 0
        self._maximum_year = 0
        self._plugin_objects_by_reporter = {}
        self._year_use = 0

    def EnablePlugins(self, plugin_includes):
        """Enables parser plugins.

    Args:
      plugin_includes (list[str]): names of the plugins to enable, where None
          or an empty list represents all plugins. Note that the default plugin
          is handled separately.
    """
        super(Exim4Parser, self).EnablePlugins(plugin_includes)

        self._plugin_objects_by_reporter = {}
        for plugin_object in self._plugin_objects:
            self._plugin_objects_by_reporter[
                plugin_object.REPORTER] = plugin_object

    def ParseRecord(self, mediator, key, structure):
        """Parses a matching entry.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      key (str): name of the parsed structure.
      structure (pyparsing.ParseResults): elements parsed from the file.

    Raises:
      UnableToParseFile: if an unsupported key is provided.
    """
        if key not in self._SUPPORTED_KEYS:
            raise errors.UnableToParseFile(
                u'Unsupported key: {0:s}'.format(key))

        timestamp = timelib.Timestamp.FromTimeParts(year=structure.year,
                                                    month=structure.month,
                                                    day=structure.day,
                                                    hour=structure.hour,
                                                    minutes=structure.minute,
                                                    seconds=structure.second,
                                                    timezone=mediator.timezone)

        reporter = structure.reporter
        attributes = {u'body': structure.body}

        plugin_object = self._plugin_objects_by_reporter.get(reporter, None)
        if not plugin_object:
            event_object = Exim4LineEvent(timestamp, 0, attributes)
            mediator.ProduceEvent(event_object)

        else:
            try:
                plugin_object.Process(mediator, timestamp, attributes)

            except errors.WrongPlugin:
                event_object = Exim4LineEvent(timestamp, 0, attributes)
                mediator.ProduceEvent(event_object)

    def VerifyStructure(self, unused_mediator, line):
        """Verifies that this is a exim4-formatted file.

    Args:
      mediator (ParserMediator): mediates the interactions between
          parsers and other components, such as storage and abort signals.
      line (str): single line from the text file.

    Returns:
      bool: whether the line appears to contain syslog content.
    """
        return re.match(self._VERIFICATION_REGEX, line) is not None