def ParseTime(self, match=None, **unused_kwargs): """Parse the log timestamp. Args: match: The regular expression match object. """ # TODO: do something with match.group(3) ? try: number_of_seconds = int(match.group(1), 10) timestamp = timelib.Timestamp.FromPosixTime(number_of_seconds) timestamp += int(match.group(2), 10) * 1000 self.timestamp = timestamp except ValueError as exception: logging.error( u'Unable to retrieve timestamp with error: {0:s}'.format( exception)) self.timestamp = 0 raise errors.ParseError(u'Not a valid timestamp.')
def StringEscape(self, string, match, **unused_kwargs): """Escape backslashes found inside a string quote. Backslashes followed by anything other than [\'"rnbt.ws] will raise an Error. Args: string: The string that matched. match: the match object (instance of re.MatchObject). Where match.group(1) contains the escaped code. Raises: ParseError: When the escaped string is not one of [\'"rnbt] """ if match.group(1) in '\\\'"rnbt\\.ws': self.string += string.decode('string_escape') else: raise errors.ParseError('Invalid escape character {0:s}.'.format(string))
def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): identifier of the structure of tokens. structure (pyparsing.ParseResults): structure of tokens derived from a line of a text file. Raises: ParseError: when the structure type is unknown. """ if key not in ('logline', 'repeated'): raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) self._ParseLogLine(parser_mediator, structure, key)
def Compile(self, filter_implementation): """Compiles the filter implementation. Args: filter_implementation: a filter object (instance of objectfilter.TODO). Returns: A filter operator (instance of TODO). Raises: ParserError: if an unknown operator is provided. """ self.attribute = self.swap_source.get(self.attribute, self.attribute) arguments = [self.attribute] op_str = self.operator.lower() operator = filter_implementation.OPS.get(op_str, None) if not operator: raise errors.ParseError('Unknown operator {0:s} provided.'.format( self.operator)) # Plaso specific implementation - if we are comparing a timestamp # to a value, we use our specific implementation that compares # timestamps in a "human readable" format. if self.attribute == 'timestamp': args = [] for arg in self.args: args.append(DateCompareObject(arg)) self.args = args for arg in self.args: if isinstance(arg, DateCompareObject): if 'Less' in str(operator): TimeRangeCache.SetUpperTimestamp(arg.data) else: TimeRangeCache.SetLowerTimestamp(arg.data) arguments.extend(self.args) expander = filter_implementation.FILTERS['ValueExpander'] ops = operator(arguments=arguments, value_expander=expander) if not self.bool_value: if hasattr(ops, 'FlipBool'): ops.FlipBool() return ops
def ParseCacheEntry(self, file_object, block_offset): """Parses a cache entry. Args: file_object (dfvfs.FileIO): a file-like object to read from. block_offset (int): block offset of the cache entry. Returns: CacheEntry: cache entry. Raises: ParseError: if the cache entry cannot be read. """ cache_entry_map = self._GetDataTypeMap('chrome_cache_entry') try: cache_entry, _ = self._ReadStructureFromFileObject( file_object, block_offset, cache_entry_map) except (ValueError, errors.ParseError) as exception: raise errors.UnableToParseFile(( 'Unable to parse cache entry at offset: 0x{0:08x} with error: ' '{1!s}').format(block_offset, exception)) cache_entry_object = CacheEntry() cache_entry_object.hash = cache_entry.hash cache_entry_object.next = CacheAddress(cache_entry.next_address) cache_entry_object.rankings_node = CacheAddress( cache_entry.rankings_node_address) cache_entry_object.creation_time = cache_entry.creation_time byte_array = cache_entry.key byte_string = bytes(bytearray(byte_array)) cache_entry_object.key, _, _ = byte_string.partition(b'\x00') try: cache_entry_object.original_url = cache_entry_object.key.decode( 'ascii') except UnicodeDecodeError as exception: raise errors.ParseError( 'Unable to decode original URL in key with error: {0!s}'. format(exception)) return cache_entry_object
def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure parsed from the log file. Raises: ParseError: when the structure type is unknown. """ if key not in self._SUPPORTED_KEYS: raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) # TODO: Add anomaly objects for abnormal timestamps, such as when the log # timestamp is greater than the session start. if key == 'logline': self._ParseLogLine(parser_mediator, structure) else: if not structure.timestamp: logging.debug('[{0:s}] {1:s} with invalid timestamp.'.format( self.NAME, key)) return event_data = PopularityContestSessionEventData() event_data.session = '{0!s}'.format(structure.session) if key == 'header': event_data.details = structure.details event_data.hostid = structure.id event_data.status = 'start' elif key == 'footer': event_data.status = 'end' date_time = dfdatetime_posix_time.PosixTime( timestamp=structure.timestamp) event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_ADDED) parser_mediator.ProduceEventWithEventData(event, event_data)
def InsertFloatArg(self, string='', **unused_kwargs): """Inserts a floating-point argument into the current expression. Args: string (Optional[str]): argument string that contains a floating-point value. Returns: str: state or None if the argument could not be added to the current expression. Raises: ParseError: TBD. """ try: float_value = float(string) except (TypeError, ValueError): raise errors.ParseError('{0:s} is not a valid float.'.format(string)) return self.InsertArg(float_value)
def ParseFileObject(self, parser_mediator, file_object): """Parses a file-like object. Args: parser_mediator (ParserMediator): a parser mediator. file_object (dfvfs.FileIO): a file-like object to parse. Raises: ParseError: when the file cannot be parsed. """ try: self._ParseFileHeader(file_object) except errors.ParseError as exception: raise errors.ParseError( 'Unable to parse index file header with error: {0!s}'.format( exception)) # Skip over the LRU data, which is 112 bytes in size. file_object.seek(112, os.SEEK_CUR) self._ParseIndexTable(file_object)
def _ParseBooleanValue(self, byte_stream): """Parses a boolean value. Args: byte_stream (bytes): byte stream. Returns: bool: boolean value. Raises: ParseError: when the boolean value cannot be parsed. """ if byte_stream == b'\x00': return False if byte_stream == b'\x01': return True raise errors.ParseError('Unsupported boolean value.')
def InsertIntArg(self, string='', **unused_kwargs): """Inserts a decimal integer argument into the current expression. Args: string (Optional[str]): argument string that contains an integer value formatted in decimal. Returns: str: state or None if the argument could not be added to the current expression. Raises: ParseError: if string does not contain a valid integer. """ try: int_value = int(string) except (TypeError, ValueError): raise errors.ParseError('{0:s} is not a valid integer.'.format(string)) return self.InsertArg(int_value)
def _ReadPropertyPage(self, file_object, file_offset, property_table): """Reads a property page. Args: file_object (file): file-like object. file_offset (int): file offset. property_table (dict[int, object]): property table in which to store the property page values. Returns: tuple[spotlight_store_db_property_page_header, int]: page header and next property page block number. Raises: ParseError: if the property page cannot be read. """ page_header, bytes_read = self._ReadPropertyPageHeader( file_object, file_offset) if page_header.property_table_type not in (0x00000011, 0x00000021, 0x00000041, 0x00000081): raise errors.ParseError( 'Unsupported property table type: 0x{0:08x}'.format( page_header.property_table_type)) page_data = file_object.read(page_header.page_size - bytes_read) data_type_map = self._GetDataTypeMap( 'spotlight_store_db_property_values_header') file_offset += bytes_read page_values_header = self._ReadStructureFromByteStream( page_data, file_offset, data_type_map) if page_header.property_table_type in (0x00000011, 0x00000021): self._ReadPropertyPageValues(page_header, page_data, property_table) elif page_header.property_table_type == 0x00000081: self._ReadIndexPageValues(page_header, page_data, property_table) return page_header, page_values_header.next_block_number
def ParseRecord(self, parser_mediator, key, structure): """Parses a matching entry. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: ParseError: when the structure type is unknown. """ if key not in self._SUPPORTED_KEYS: raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) date_time = dfdatetime_time_elements.TimeElements() try: iso_date_time = self._GetISO8601String(structure.date_time) date_time.CopyFromStringISO8601(iso_date_time) except ValueError: parser_mediator.ProduceExtractionError( 'invalid date time value: {0!s}'.format(structure.date_time)) return event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_RECORDED) event_data = ApacheAccessEventData() event_data.ip_address = structure.ip_address event_data.remote_name = structure.remote_name event_data.user_name = structure.user_name event_data.http_request = structure.http_request event_data.http_response_code = structure.response_code event_data.http_response_bytes = structure.response_bytes if key == 'combined_log_format': event_data.http_request_referer = structure.referer event_data.http_request_user_agent = structure.user_agent parser_mediator.ProduceEventWithEventData(event, event_data)
def AddArg(self, argument): """Adds a new argument to this expression. Args: argument (str): argument to add. Returns: bool: True if the argument is the last argument, False otherwise. Raises: ParseError: If there are too many arguments. """ self.args.append(argument) if len(self.args) > self.number_of_args: raise errors.ParseError('Too many arguments for this expression.') if len(self.args) == self.number_of_args: return True return False
def AddArg(self, arg): """Adds a new arg to this expression. Args: arg: The argument to add (string). Returns: True if this arg is the last arg, False otherwise. Raises: ParseError: If there are too many args. """ self.args.append(arg) if len(self.args) > self.number_of_args: raise errors.ParseError('Too many args for this expression.') elif len(self.args) == self.number_of_args: return True return False
def _PopState(self, **unused_kwargs): """Pops the previous state from the stack. Returns: str: next state, which is the previous state on the stack. Raises: ParseError: if the stack is empty. """ try: self._state = self._state_stack.pop() except IndexError: raise errors.ParseError(( 'Tried to pop state from an empty stack - possible recursion error ' 'at position {0!s}: {1!s} <---> {2!s} )').format( len(self._processed_buffer), self._processed_buffer, self._buffer)) logging.debug('Returned state to {0:s}'.format(self._state)) return self._state
def Compile(self): """Compiles the expression into a filter. Returns: Filter: filter object corresponding the expression. Raises: ParseError: if the operator is not supported. """ operator = self.operator.lower() if operator in ('and', '&&'): filter_class = filters.AndFilter elif operator in ('or', '||'): filter_class = filters.OrFilter else: raise errors.ParseError( 'Unusupported operator: {0:s}.'.format(operator)) args = [argument.Compile() for argument in self.args] return filter_class(arguments=args)
def ParseRecord(self, parser_mediator, key, structure): """Parses a log record structure and produces events. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): structure parsed from the log file. Raises: ParseError: when the structure type is unknown. """ if key not in (u'comment', u'logline'): raise errors.ParseError( u'Unable to parse record, unknown structure: {0:s}'.format(key)) if key == u'logline': self._ParseLogLine(parser_mediator, structure) elif key == u'comment': self._ParseComment(structure)
def _ReadMetadataAttributeStringValue(self, property_type, data): """Reads a metadata attribute string value. Args: property_type (int): metadata attribute property type. data (bytes): data. Returns: tuple[object, int]: value and number of bytes read. Raises: ParseError: if the metadata attribute string value cannot be read. """ data_size, bytes_read = self._ReadVariableSizeInteger(data) data_type_map = self._GetDataTypeMap('array_of_cstring') context = dtfabric_data_maps.DataTypeMapContext(values={ 'elements_data_size': data_size}) try: array_of_values = data_type_map.MapByteStream( data[bytes_read:bytes_read + data_size], context=context) except dtfabric_errors.MappingError as exception: raise errors.ParseError( 'Unable to parse array of string values with error: {0!s}'.format( exception)) if property_type & 0x03 == 0x03: value = array_of_values[0] if '\x16\x02' in value: value = value.split('\x16\x02')[0] elif property_type & 0x03 == 0x02: value = array_of_values else: value = array_of_values[0] bytes_read += data_size return value, bytes_read
def _ReadMetadataAttributeFloat64Value(self, property_type, data): """Reads a metadata attribute 64-bit floating-point value. Args: property_type (int): metadata attribute property type. data (bytes): data. Returns: tuple[object, int]: value and number of bytes read. Raises: ParseError: if the metadata attribute 64-bit floating-point value cannot be read. """ if property_type & 0x02 == 0x00: data_size, bytes_read = 8, 0 else: data_size, bytes_read = self._ReadVariableSizeInteger(data) data_type_map = self._GetDataTypeMap('array_of_float64') context = dtfabric_data_maps.DataTypeMapContext(values={ 'elements_data_size': data_size}) try: array_of_values = data_type_map.MapByteStream( data[bytes_read:bytes_read + data_size], context=context) except dtfabric_errors.MappingError as exception: raise errors.ParseError(( 'Unable to parse array of 64-bit floating-point values with error: ' '{0!s}').format(exception)) if bytes_read == 0: value = array_of_values[0] else: value = array_of_values bytes_read += data_size return value, bytes_read
def _CheckSignature(self, value_data): """Parses and validates the signature. Args: value_data (bytes): value data. Returns: int: format type or None if format could not be determined. Raises: ParseError: if the value data could not be parsed. """ signature_map = self._GetDataTypeMap('uint32le') try: signature = self._ReadStructureFromByteStream( value_data, 0, signature_map) except (ValueError, errors.ParseError) as exception: raise errors.ParseError( 'Unable to parse signature value with error: {0!s}'.format( exception)) format_type = self._HEADER_SIGNATURES.get(signature, None) if format_type == self._FORMAT_TYPE_2003: # TODO: determine which format version is used (2003 or Vista). return self._FORMAT_TYPE_2003 if format_type == self._FORMAT_TYPE_8: cached_entry_signature = value_data[signature:signature + 4] if cached_entry_signature in (self._CACHED_ENTRY_SIGNATURE_8_0, self._CACHED_ENTRY_SIGNATURE_8_1): return self._FORMAT_TYPE_8 elif format_type == self._FORMAT_TYPE_10: # Windows 10 uses the same cache entry signature as Windows 8.1 cached_entry_signature = value_data[signature:signature + 4] if cached_entry_signature == self._CACHED_ENTRY_SIGNATURE_8_1: return self._FORMAT_TYPE_10 return format_type
def _ReadAttributeValueBinaryData(self, attribute_values_data, record_offset, attribute_values_data_offset, attribute_value_offset): """Reads a binary data attribute value. Args: attribute_values_data (bytes): attribute values data. record_offset (int): offset of the record relative to the start of the file. attribute_values_data_offset (int): offset of the attribute values data relative to the start of the record. attribute_value_offset (int): offset of the attribute relative to the start of the record. Returns: bytes: binary data value or None if attribute value offset is not set. Raises: ParseError: if the attribute value cannot be read. """ if attribute_value_offset == 0: return None data_type_map = self._GetDataTypeMap('keychain_blob') file_offset = (record_offset + attribute_values_data_offset + attribute_value_offset) attribute_value_offset -= attribute_values_data_offset + 1 attribute_value_data = attribute_values_data[attribute_value_offset:] try: string_attribute_value = self._ReadStructureFromByteStream( attribute_value_data, file_offset, data_type_map) except (ValueError, errors.ParseError) as exception: raise errors.ParseError(( 'Unable to map binary data attribute value data at offset: 0x{0:08x} ' 'with error: {1!s}').format(file_offset, exception)) return string_attribute_value.blob
def _SetDateTimeDecimalInteger(self, string='', **unused_kwargs): """Sets a decimal integer argument to the datetime value. Note that this function is used as a callback by _GetNextToken. Args: string (Optional[str]): argument string that contains an integer value formatted in decimal. Returns: str: state. Raises: ParseError: if string does not contain a valid integer. """ try: self._datetime_value = int(string) except (TypeError, ValueError): raise errors.ParseError('{0:s} is not a valid integer.'.format(string)) return self._STATE_DATETIME
def HexEscape(self, string, match, **unused_kwargs): """Converts a hex escaped string. Note that this function is used as a callback by _GetNextToken. Returns: str: next state, which is None. Raises: ParseError: if the string is not hex escaped. """ logging.debug('HexEscape matched {0:s}.'.format(string)) hex_string = match.group(1) try: hex_string = binascii.unhexlify(hex_string) hex_string = codecs.decode(hex_string, 'utf-8') self._string += hex_string except (TypeError, binascii.Error): raise errors.ParseError('Invalid hex escape {0!s}.'.format(hex_string)) return None
def _AddArgumentFloatingPoint(self, string='', **unused_kwargs): """Adds a floating-point argument to the current expression. Note that this function is used as a callback by _GetNextToken. Args: string (Optional[str]): argument string that contains a floating-point value. Returns: str: state or None if the argument could not be added to the current expression. Raises: ParseError: if string does not contain a valid floating-point number. """ try: float_value = float(string) except (TypeError, ValueError): raise errors.ParseError('{0:s} is not a valid float.'.format(string)) return self._AddArgument(float_value)
def _AddArgumentDecimalInteger(self, string='', **unused_kwargs): """Adds a decimal integer argument to the current expression. Note that this function is used as a callback by _GetNextToken. Args: string (Optional[str]): argument string that contains an integer value formatted in decimal. Returns: str: state or None if the argument could not be added to the current expression. Raises: ParseError: if string does not contain a valid integer. """ try: int_value = int(string) except (TypeError, ValueError): raise errors.ParseError('{0:s} is not a valid integer.'.format(string)) return self._AddArgument(int_value)
def _ParseValue(self, registry_value): """Parses the registry value. Args: registry_value (bytes): value data. Returns: int: timestamp. Raises: ParseError: if the value data could not be parsed. """ try: timestamp = self._ReadStructureFromByteStream( registry_value, 0, self._GetDataTypeMap('filetime')) except (ValueError, errors.ParseError) as exception: raise errors.ParseError( 'Unable to parse timestamp with error: {0!s}'.format( exception)) return timestamp
def ParseRecord(self, parser_mediator, key, structure): """Parses a matching entry. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. key (str): name of the parsed structure. structure (pyparsing.ParseResults): elements parsed from the file. Raises: ParseError: when the structure type is unknown. """ if key not in self._SUPPORTED_KEYS: raise errors.ParseError( 'Unable to parse record, unknown structure: {0:s}'.format(key)) if key == 'greeting': self._ReadGreeting(structure) elif key == 'log_entry': self._ParseLine(parser_mediator, structure)
def _ReadAttributeValueDateTime( self, attribute_values_data, record_offset, attribute_values_data_offset, attribute_value_offset): """Reads a date time attribute value. Args: attribute_values_data (bytes): attribute values data. record_offset (int): offset of the record relative to the start of the file. attribute_values_data_offset (int): offset of the attribute values data relative to the start of the record. attribute_value_offset (int): offset of the attribute relative to the start of the record. Returns: str: date and time values. Raises: ParseError: if the attribute value cannot be read. """ if attribute_value_offset == 0: return None data_type_map = self._GetDataTypeMap('keychain_date_time') file_offset = ( record_offset + attribute_values_data_offset + attribute_value_offset) attribute_value_offset -= attribute_values_data_offset + 1 attribute_value_data = attribute_values_data[attribute_value_offset:] try: date_time_attribute_value = self._ReadStructureFromByteStream( attribute_value_data, file_offset, data_type_map) except (ValueError, errors.ParseError) as exception: raise errors.ParseError(( 'Unable to map date time attribute value data at offset: 0x{0:08x} ' 'with error: {1!s}').format(file_offset, exception)) return date_time_attribute_value.date_time.rstrip('\x00')
def _ParseApplicationPasswordRecord(self, parser_mediator, record): """Extracts the information from an application password record. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. record (dict[str, object]): database record. Raises: ParseError: if Internet password record cannot be parsed. """ key = record.get('_key_', None) if not key or not key.startswith(b'ssgp'): raise errors.ParseError(( 'Unsupported application password record key value does not start ' 'with: "ssgp".')) event_data = KeychainApplicationRecordEventData() event_data.account_name = self._ParseBinaryDataAsString( parser_mediator, record['acct']) event_data.comments = self._ParseBinaryDataAsString( parser_mediator, record['crtr']) event_data.entry_name = self._ParseBinaryDataAsString( parser_mediator, record['PrintName']) ssgp_hash = codecs.encode(key[4:], 'hex') event_data.ssgp_hash = codecs.decode(ssgp_hash, 'utf-8') event_data.text_description = self._ParseBinaryDataAsString( parser_mediator, record['desc']) date_time = self._ParseDateTimeValue(parser_mediator, record['cdat']) if date_time: event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_CREATION) parser_mediator.ProduceEventWithEventData(event, event_data) date_time = self._ParseDateTimeValue(parser_mediator, record['mdat']) if date_time: event = time_events.DateTimeValuesEvent( date_time, definitions.TIME_DESCRIPTION_MODIFICATION) parser_mediator.ProduceEventWithEventData(event, event_data)
def _ReadData(self, file_object, file_offset, data_size): """Reads data. Args: file_object (dvfvs.FileIO): a file-like object to read. file_offset (int): offset of the data relative to the start of the file-like object. data_size (int): size of the data. The resulting data size much match the requested data size so that dtFabric can map the data type definitions onto the byte stream. Returns: bytes: byte stream containing the data. Raises: ParseError: if the data cannot be read. ValueError: if the file-like object is missing. """ if not file_object: raise ValueError('Missing file-like object.') file_object.seek(file_offset, os.SEEK_SET) read_error = '' try: data = file_object.read(data_size) if len(data) != data_size: read_error = 'missing data' except IOError as exception: read_error = '{0!s}'.format(exception) if read_error: raise errors.ParseError( 'Unable to read data at offset: 0x{0:08x} with error: {1:s}'.format( file_offset, read_error)) return data