def CompileFilter(self, filter_string): """Compile the filter string into a EventObjectFilter matcher.""" lex = SelectiveLexer(filter_string) _ = lex.NextToken() if lex.error: raise errors.WrongPlugin('Malformed filter string.') _ = lex.NextToken() if lex.error: raise errors.WrongPlugin('No fields defined.') if lex.state is not 'END': while lex.state is not 'END': _ = lex.NextToken() if lex.error: raise errors.WrongPlugin('No filter defined for DynamicFilter.') if lex.state != 'END': raise errors.WrongPlugin( 'Malformed DynamicFilter, end state not reached.') self._fields = lex.fields self._limit = lex.limit self._separator = unicode(lex.separator) if lex.lex_filter: super(DynamicFilter, self).CompileFilter(lex.lex_filter) else: self._matcher = None self._filter_expression = filter_string
def _ParseEntry(self, entry): """Parses a single filter entry. Args: entry (dict[str, dict[str, object]]): dictionary containing one more filter rules and associated metadata. Raises: WrongPlugin: if the entry cannot be parsed. """ # A single file with a list of filters to parse. for name, meta in entry.items(): if 'filter' not in meta: raise errors.WrongPlugin( 'Entry inside {0:s} does not contain a filter statement.'. format(name)) meta_filter = meta.get('filter') matcher = self._GetMatcher(meta_filter) if not matcher: raise errors.WrongPlugin( 'Filter entry [{0:s}] malformed for rule: <{1:s}>'.format( meta_filter, name)) self.filters.append((name, matcher, meta))
def CompileFilter(self, filter_string): """Compile a set of ObjectFilters defined in an YAML file.""" if not os.path.isfile(filter_string): raise errors.WrongPlugin(( 'ObjectFilterList requires an YAML file to be passed on, this filter ' 'string is not a file.')) yaml.add_constructor('!include', IncludeKeyword, Loader=yaml.loader.SafeLoader) results = None with open(filter_string, 'rb') as fh: try: results = yaml.safe_load(fh) except (yaml.scanner.ScannerError, IOError) as exception: raise errors.WrongPlugin( u'Unable to parse YAML file with error: {0:s}.'.format(exception)) self.filters = [] results_type = type(results) if results_type is dict: self._ParseEntry(results) elif results_type is list: for result in results: if type(result) is not dict: raise errors.WrongPlugin( u'Wrong format of YAML file, entry not a dict ({})'.format( results_type)) self._ParseEntry(result) else: raise errors.WrongPlugin( u'Wrong format of YAML file, entry not a dict ({})'.format( results_type))
def Process(self, cookie_name=None, cookie_data=None, **kwargs): """Determine if this is the right plugin for this cookie. Args: cookie_name: The name of the cookie value. cookie_data: The cookie data, as a byte string. Returns: A generator that yields event objects. Raises: errors.WrongPlugin: If the cookie name differs from the one supplied in COOKIE_NAME. ValueError: If cookie_name or cookie_data are not set. """ if cookie_name is None or cookie_data is None: raise ValueError(u'Cookie name or data are not set.') if cookie_name != self.COOKIE_NAME: raise errors.WrongPlugin( u'Not the correct cookie plugin for: {} [{}]'.format( cookie_name, self.plugin_name)) super(CookiePlugin, self).Process(**kwargs) return self.GetEntries(cookie_data=cookie_data)
def Process(self, parser_mediator, date_time, syslog_tokens, **kwargs): """Processes the data structure produced by the parser. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. date_time (dfdatetime.DateTimeValues): date and time values. syslog_tokens (dict[str, str]): names of the fields extracted by the syslog parser and the matching grammar, and values are the values of those fields. Raises: AttributeError: If the syslog_tokens do not include a 'body' attribute. WrongPlugin: If the plugin is unable to parse the syslog tokens. """ body = syslog_tokens.get('body', None) if not body: raise AttributeError('Missing required attribute: body') for key, grammar in self.MESSAGE_GRAMMARS: try: tokens = grammar.parseString(body) syslog_tokens.update(tokens.asDict()) self._ParseMessage(parser_mediator, key, date_time, syslog_tokens) return except pyparsing.ParseException: pass raise errors.WrongPlugin('Unable to create event from: {0:s}'.format(body))
def Process(self, parser_mediator, root_item, item_names, **kwargs): """Determine if this is the right plugin for this OLECF file. This function takes a list of sub items found in the root of a OLECF file and compares that to a list of required items defined in this plugin. Args: parser_mediator: A parser mediator object (instance of ParserMediator). root_item: Optional root item of the OLECF file. item_names: Optional list of all items discovered in the root. Raises: errors.WrongPlugin: If the set of required items is not a subset of the available items. ValueError: If the root_item or items are not set. """ if root_item is None or item_names is None: raise ValueError(u'Root item or items are not set.') if not frozenset(item_names) >= self.REQUIRED_ITEMS: raise errors.WrongPlugin( u'Not the correct items for: {0:s}'.format(self.NAME)) # This will raise if unhandled keyword arguments are passed. super(OlecfPlugin, self).Process(parser_mediator) items = [] for item_string in self.REQUIRED_ITEMS: item = root_item.get_sub_item_by_name(item_string) if item: items.append(item) self.ParseItems(parser_mediator, root_item=root_item, items=items)
def GetEntries(self, parser_mediator, cookie_data=None, url=None, **unused_kwargs): """Extracts event objects from the cookie. Args: parser_mediator: A parser mediator object (instance of ParserMediator). cookie_data: The cookie data, as a byte string. url: The full URL or path where the cookie got set. """ # Values has the structure of: # <domain hash>.<pages viewed>.10.<last time> fields = cookie_data.split(u'.') # Check for a valid record. if len(fields) != 4: raise errors.WrongPlugin( u'Wrong number of fields. [{0:d} vs. 4]'.format(len(fields))) domain_hash, pages_viewed, _, last = fields event_object = GoogleAnalyticsEvent( int(last, 10), eventdata.EventTimestamp.LAST_VISITED_TIME, url, 'utmb', self.COOKIE_NAME, domain_hash=domain_hash, pages_viewed=int(pages_viewed, 10)) parser_mediator.ProduceEvent(event_object)
def Process(self, database=None, cache=None, **kwargs): """Determines if this is the appropriate plugin for the database. Args: database: Optional ESE database object (instance of pyesedb.file). The default is None. cache: Optional cache object (instance of EseDbCache). The default is None. Returns: A generator that yields event object. Raises: errors.WrongPlugin: If the database does not contain all the tables defined in the required_tables set. ValueError: If the database attribute is not valid. """ if database is None: raise ValueError(u'Invalid database.') table_names = frozenset(self._GetTableNames(database)) if self._required_tables.difference(table_names): raise errors.WrongPlugin( u'[{0:s}] required tables not found.'.format(self.plugin_name)) super(EseDbPlugin, self).Process(**kwargs) return self.GetEntries(database=database, cache=cache, **kwargs)
def Process(self, parser_mediator, timestamp, syslog_tokens, **kwargs): """Processes the data structure produced by the parser. Args: parser_mediator: a parser mediator object (instance of ParserMediator). timestamp: the timestamp, which is an integer containing the number of micro seconds since January 1, 1970, 00:00:00 UTC or 0 on error. syslog_tokens: a dictionary whose keys are the names of the fields extracted by the syslog parser, and values are the values of those fields. Raises: AttributeError: If the syslog_tokens do not include a 'body' attribute. WrongPlugin: If the plugin is unable to parse the syslog tokens. """ body = syslog_tokens.get(u'body', None) if not body: raise AttributeError(u'Missing required attribute: body') for key, grammar in iter(self.MESSAGE_GRAMMARS): try: tokens = grammar.parseString(body) syslog_tokens.update(tokens.asDict()) self.ParseMessage(parser_mediator, key, timestamp, syslog_tokens) return except pyparsing.ParseException: pass raise errors.WrongPlugin( u'Unable to create event from: {0:s}'.format(body))
def _ParseEntry(self, entry): """Parse a single YAML filter entry.""" # A single file with a list of filters to parse. for name, meta in entry.items(): if 'filter' not in meta: raise errors.WrongPlugin( u'Entry inside {} does not contain a filter statement.'. format(name)) matcher = pfilter.GetMatcher(meta.get('filter'), True) if not matcher: raise errors.WrongPlugin( u'Filter entry [{0:s}] malformed for rule: <{1:s}>'.format( meta.get('filter'), name)) self.filters.append((name, matcher, meta))
def Process(self, cache=None, database=None, **kwargs): """Determine if this is the right plugin for this database. This function takes a SQLiteDatabase object and compares the list of required tables against the available tables in the database. If all the tables defined in REQUIRED_TABLES are present in the database then this plugin is considered to be the correct plugin and the function will return back a generator that yields event objects. Args: cache: A SQLiteCache object. database: A database object (instance of SQLiteDatabase). Returns: A generator that yields event objects. Raises: errors.WrongPlugin: If the database does not contain all the tables defined in the REQUIRED_TABLES set. ValueError: If the database attribute is not passed in. """ if database is None: raise ValueError(u'Database is not set.') if not frozenset(database.tables) >= self.REQUIRED_TABLES: raise errors.WrongPlugin( u'Not the correct database tables for: {}'.format( self.plugin_name)) super(SQLitePlugin, self).Process(**kwargs) return self.GetEntries(cache=cache, database=database)
def Process(self, parser_mediator, timestamp, syslog_tokens, **kwargs): """Processes the data structure produced by the parser. Args: parser_mediator (ParserMediator): mediates the interactions between parsers and other components, such as storage and abort signals. timestamp (int): number of micro seconds since January 1, 1970, 00:00:00 UTC or 0 on error. syslog_tokens (dict[str, str]): names of the fields extracted by the syslog parser and the matching grammar, and values are the values of those fields. Raises: AttributeError: If the syslog_tokens do not include a 'body' attribute. WrongPlugin: If the plugin is unable to parse the syslog tokens. """ body = syslog_tokens.get('body', None) if not body: raise AttributeError('Missing required attribute: body') for key, grammar in iter(self.MESSAGE_GRAMMARS): try: tokens = grammar.parseString(body) syslog_tokens.update(tokens.asDict()) self.ParseMessage(parser_mediator, key, timestamp, syslog_tokens) return except pyparsing.ParseException: pass raise errors.WrongPlugin( 'Unable to create event from: {0:s}'.format(body))
def Process(self, parser_mediator, cookie_name, cookie_data, url, **kwargs): """Determine if this is the right plugin for this cookie. Args: parser_mediator: A parser mediator object (instance of ParserMediator). cookie_name: The name of the cookie value. cookie_data: The cookie data, as a byte string. url: The full URL or path where the cookie got set. Raises: errors.WrongPlugin: If the cookie name differs from the one supplied in COOKIE_NAME. ValueError: If cookie_name or cookie_data are not set. """ if cookie_name is None or cookie_data is None: raise ValueError(u'Cookie name or data are not set.') if cookie_name != self.COOKIE_NAME: raise errors.WrongPlugin( u'Not the correct cookie plugin for: {0:s} [{1:s}]'.format( cookie_name, self.NAME)) # This will raise if unhandled keyword arguments are passed. super(CookiePlugin, self).Process(parser_mediator) self.GetEntries(parser_mediator, cookie_data=cookie_data, url=url)
def Process(self, parser_mediator, database=None, cache=None, **kwargs): """Determines if this is the appropriate plugin for the database. Args: parser_mediator: A parser mediator object (instance of ParserMediator). database: Optional ESE database object (instance of pyesedb.file). The default is None. cache: Optional cache object (instance of EseDbCache). The default is None. Raises: errors.WrongPlugin: If the database does not contain all the tables defined in the required_tables set. ValueError: If the database attribute is not valid. """ if database is None: raise ValueError(u'Invalid database.') table_names = frozenset(self._GetTableNames(database)) if self._required_tables.difference(table_names): raise errors.WrongPlugin( u'[{0:s}] required tables not found.'.format(self.NAME)) # This will raise if unhandled keyword arguments are passed. super(EseDbPlugin, self).Process(parser_mediator) self.GetEntries(parser_mediator, database=database, cache=cache, **kwargs)
def Process(self, parser_mediator, cookie_name, cookie_data, url, **kwargs): """Determine if this is the right plugin for this cookie. Args: parser_mediator (ParserMediator): mediates interactions between parsers and other components, such as storage and dfvfs. cookie_name (str): the name of the cookie value. cookie_data (bytes): the cookie data, as a byte sequence. url (str): the full URL or path where the cookie was set. Raises: errors.WrongPlugin: If the cookie name differs from the one supplied in COOKIE_NAME. ValueError: If cookie_name or cookie_data are not set. """ if cookie_name is None or cookie_data is None: raise ValueError('Cookie name or data are not set.') if cookie_name != self.COOKIE_NAME: raise errors.WrongPlugin( 'Not the correct cookie plugin for: {0:s} [{1:s}]'.format( cookie_name, self.NAME)) # This will raise if unhandled keyword arguments are passed. super(BaseCookiePlugin, self).Process(parser_mediator) self.GetEntries(parser_mediator, cookie_data=cookie_data, url=url)
def GetEntries(self, parser_mediator, cookie_data=None, url=None, **unused_kwargs): """Extracts event objects from the cookie. Args: parser_mediator: A parser mediator object (instance of ParserMediator). cookie_data: The cookie data, as a byte string. url: The full URL or path where the cookie got set. """ # Values has the structure of: # <domain hash>.<visitor ID>.<first visit>.<previous>.<last>.<# of # sessions> fields = cookie_data.split(u'.') # Check for a valid record. if len(fields) != 6: raise errors.WrongPlugin( u'Wrong number of fields. [{0:d} vs. 6]'.format(len(fields))) domain_hash, visitor_id, first_visit, previous, last, sessions = fields # TODO: catch int() throwing a ValueError. # TODO: Double check this time is stored in UTC and not local time. first_epoch = int(first_visit, 10) event_object = GoogleAnalyticsEvent(first_epoch, 'Analytics Creation Time', url, 'utma', self.COOKIE_NAME, domain_hash=domain_hash, visitor_id=visitor_id, sessions=int(sessions, 10)) parser_mediator.ProduceEvent(event_object) event_object = GoogleAnalyticsEvent(int(previous, 10), 'Analytics Previous Time', url, 'utma', self.COOKIE_NAME, domain_hash=domain_hash, visitor_id=visitor_id, sessions=int(sessions, 10)) parser_mediator.ProduceEvent(event_object) event_object = GoogleAnalyticsEvent( int(last, 10), eventdata.EventTimestamp.LAST_VISITED_TIME, url, 'utma', self.COOKIE_NAME, domain_hash=domain_hash, visitor_id=visitor_id, sessions=int(sessions, 10)) parser_mediator.ProduceEvent(event_object)
def CompileFilter(self, filter_expression): """Compiles the filter expression. The filter expression contains the name of a YAML file. Args: filter_expression: string that contains the filter expression. Raises: WrongPlugin: if the filter could not be compiled. """ if not os.path.isfile(filter_expression): raise errors.WrongPlugin( (u'ObjectFilterList requires an YAML file to be passed on, ' u'this filter string is not a file.')) yaml.add_constructor(u'!include', self._IncludeKeyword, Loader=yaml.loader.SafeLoader) results = None with open(filter_expression, 'rb') as file_object: try: results = yaml.safe_load(file_object) except (yaml.scanner.ScannerError, IOError) as exception: raise errors.WrongPlugin( u'Unable to parse YAML file with error: {0:s}.'.format( exception)) self.filters = [] results_type = type(results) if results_type is dict: self._ParseEntry(results) elif results_type is list: for result in results: if not isinstance(result, dict): raise errors.WrongPlugin( u'Wrong format of YAML file, entry not a dict ({0:s})'. format(results_type)) self._ParseEntry(result) else: raise errors.WrongPlugin( u'Wrong format of YAML file, entry not a dict ({0:s})'.format( results_type)) self._filter_expression = filter_expression
def __init__(self, reg_type): """Initialize the plugin. Args: reg_type: The detected Windows Registry type. This value should match the REG_TYPE value defined by the plugins. """ super(WinRegCachePlugin, self).__init__() if self.REG_TYPE.lower() != reg_type.lower(): raise errors.WrongPlugin(u'Not the correct Windows Registry type.')
def CompileFilter(self, filter_expression): """Compiles the filter expression. The filter expression contains an object filter expression extended with selective field selection. Args: filter_expression: string that contains the filter expression. Raises: WrongPlugin: if the filter could not be compiled. """ lexer_object = SelectiveLexer(filter_expression) lexer_object.NextToken() if lexer_object.error: raise errors.WrongPlugin('Malformed filter string.') lexer_object.NextToken() if lexer_object.error: raise errors.WrongPlugin('No fields defined.') while lexer_object.state != self._STATE_END: lexer_object.NextToken() if lexer_object.error: raise errors.WrongPlugin( 'No filter defined for DynamicFilter.') if lexer_object.state != self._STATE_END: raise errors.WrongPlugin( 'Malformed DynamicFilter, end state not reached.') self._fields = lexer_object.fields self._limit = lexer_object.limit self._separator = '{0:s}'.format(lexer_object.separator) if lexer_object.lex_filter: super(DynamicFilter, self).CompileFilter(lexer_object.lex_filter) else: self._matcher = None self._filter_expression = filter_expression
def GetEntries(self, parser_mediator, cookie_data=None, url=None, **unused_kwargs): """Extracts event objects from the cookie. Args: parser_mediator: A parser mediator object (instance of ParserMediator). cookie_data: The cookie data, as a byte string. url: The full URL or path where the cookie got set. """ # The structure of the field: # <domain hash>.<last time>.<sessions>.<sources>.<variables> fields = cookie_data.split('.') if len(fields) > 5: variables = u'.'.join(fields[4:]) fields = fields[0:4] fields.append(variables) if len(fields) != 5: raise errors.WrongPlugin( u'Wrong number of fields. [{0:d} vs. 5]'.format(len(fields))) domain_hash, last, sessions, sources, variables = fields extra_variables = variables.split(u'|') kwargs = {} for variable in extra_variables: key, _, value = variable.partition(u'=') try: value_line = unicode(urllib.unquote(str(value)), 'utf-8') except UnicodeDecodeError: value_line = repr(value) kwargs[key] = value_line event_object = GoogleAnalyticsEvent( int(last, 10), eventdata.EventTimestamp.LAST_VISITED_TIME, url, 'utmz', self.COOKIE_NAME, domain_hash=domain_hash, sessions=int(sessions, 10), sources=int(sources, 10), **kwargs) parser_mediator.ProduceEvent(event_object)
def _ParseEntry(self, entry): """Parses a single filter entry. Args: entry: YAML string that defines a single object filter entry. Raises: WrongPlugin: if the entry cannot be parsed. """ # A single file with a list of filters to parse. for name, meta in entry.items(): if u'filter' not in meta: raise errors.WrongPlugin( u'Entry inside {0:s} does not contain a filter statement.'. format(name)) meta_filter = meta.get(u'filter') matcher = self._GetMatcher(meta_filter) if not matcher: raise errors.WrongPlugin( u'Filter entry [{0:s}] malformed for rule: <{1:s}>'.format( meta_filter, name)) self.filters.append((name, matcher, meta))
def CompileFilter(self, unused_filter_string): """Verify filter string and prepare the filter for later usage. This function verifies the filter string matches the definition of the class and if necessary compiles or prepares the filter so it can start matching against passed in EventObjects. Args: unused_filter_string: A string passed in that should be recognized by the filter class. Raises: errors.WrongPlugin: If this filter string does not match the filter class. """ raise errors.WrongPlugin('Not the correct filter for this string.')
def CompileFilter(self, filter_expression): """Compiles the filter expression. The filter expression contains an object filter expression. Args: filter_expression: string that contains the filter expression. Raises: WrongPlugin: if the filter could not be compiled. """ matcher = self._GetMatcher(filter_expression) if not matcher: raise errors.WrongPlugin(u'Malformed filter expression.') self._filter_expression = filter_expression self._matcher = matcher
def GetEntries(self, parser_mediator, cookie_data=None, url=None, **unused_kwargs): """Extracts event objects from the cookie. Args: parser_mediator: A parser mediator object (instance of ParserMediator). cookie_data: The cookie data, as a byte string. url: The full URL or path where the cookie got set. """ fields = cookie_data.split(u'.') number_of_fields = len(fields) if number_of_fields != 4: raise errors.WrongPlugin( u'Wrong number of fields. [{0:d} vs. 4]'.format( number_of_fields)) domain_hash = fields[0] try: number_of_pages_viewed = int(fields[1], 10) except ValueError: number_of_pages_viewed = 0 try: last_visit_posix_time = int(fields[3], 10) except ValueError: last_visit_posix_time = 0 if last_visit_posix_time is not None: timestamp_description = eventdata.EventTimestamp.LAST_VISITED_TIME else: last_visit_posix_time = timelib.Timestamp.NONE_TIMESTAMP timestamp_description = eventdata.EventTimestamp.NOT_A_TIME event_object = GoogleAnalyticsEvent( last_visit_posix_time, timestamp_description, u'utmb', url, domain_hash=domain_hash, number_of_pages_viewed=number_of_pages_viewed) parser_mediator.ProduceEvent(event_object)
def GetEntries(self, cookie_data, **unused_kwargs): """Yield event objects extracted from the cookie.""" # Values has the structure of: # <domain hash>.<pages viewed>.10.<last time> fields = cookie_data.split('.') # Check for a valid record. if len(fields) != 4: raise errors.WrongPlugin( u'Wrong number of fields. [{} vs. 4]'.format(len(fields))) domain_hash, pages_viewed, _, last = fields yield GoogleAnalyticsEvent(int(last, 10), eventdata.EventTimestamp.LAST_VISITED_TIME, self._data_type, domain_hash=domain_hash, pages_viewed=int(pages_viewed, 10))
def Process(self, root_item=None, item_names=None, **kwargs): """Determine if this is the right plugin for this OLECF file. This function takes a list of sub items found in the root of a OLECF file and compares that to a list of required items defined in this plugin. If the list of required items is a subset of the overall items this plugin is considered to be the correct plugin and the function will return back a generator that yields event objects. Args: root_item: The root item of the OLECF file. item_names: A list of all items discovered in the root. Returns: A generator that yields event objects. Raises: errors.WrongPlugin: If the set of required items is not a subset of the available items. ValueError: If the root_item or items are not set. """ if root_item is None or item_names is None: raise ValueError(u'Root item or items are not set.') if not frozenset(item_names) >= self.REQUIRED_ITEMS: raise errors.WrongPlugin(u'Not the correct items for: {}'.format( self.plugin_name)) super(OlecfPlugin, self).Process(**kwargs) items = [] for item_string in self.REQUIRED_ITEMS: item = root_item.get_sub_item_by_name(item_string) if item: items.append(item) return self.GetEntries(root_item=root_item, items=items)
def GetEntries(self, cookie_data, **unused_kwargs): """Process the cookie.""" # The structure of the field: # <domain hash>.<last time>.<sessions>.<sources>.<variables> fields = cookie_data.split('.') if len(fields) > 5: variables = '.'.join(fields[4:]) fields = fields[0:4] fields.append(variables) if len(fields) != 5: raise errors.WrongPlugin( u'Wrong number of fields. [{} vs. 5]'.format(len(fields))) domain_hash, last, sessions, sources, variables = fields extra_variables = variables.split('|') extra_variables_translated = [] for variable in extra_variables: key, _, value = variable.partition('=') translation = self.GA_UTMZ_TRANSLATION.get(key, key) try: value_line = unicode(urllib.unquote(str(value)), 'utf-8') except UnicodeDecodeError: value_line = repr(value) extra_variables_translated.append(u'{} = {}'.format( translation, value_line)) yield GoogleAnalyticsEvent(int(last, 10), eventdata.EventTimestamp.LAST_VISITED_TIME, self._data_type, domain_hash=domain_hash, sessions=int(sessions, 10), sources=int(sources, 10), extra=extra_variables_translated)
def GetEntries(self, cookie_data, **unused_kwargs): """Yield event objects extracted from the cookie.""" # Values has the structure of: # <domain hash>.<visitor ID>.<first visit>.<previous>.<last>.<# of # sessions> fields = cookie_data.split('.') # Check for a valid record. if len(fields) != 6: raise errors.WrongPlugin( u'Wrong number of fields. [{} vs. 6]'.format(len(fields))) domain_hash, visitor_id, first_visit, previous, last, sessions = fields # TODO: Double check this time is stored in UTC and not local time. first_epoch = int(first_visit, 10) yield GoogleAnalyticsEvent(first_epoch, 'Analytics Creation Time', self._data_type, domain_hash=domain_hash, visitor_id=visitor_id, sessions=int(sessions, 10)) yield GoogleAnalyticsEvent(int(previous, 10), 'Analytics Previous Time', self._data_type, domain_hash=domain_hash, visitor_id=visitor_id, sessions=int(sessions, 10)) yield GoogleAnalyticsEvent(int(last, 10), eventdata.EventTimestamp.LAST_VISITED_TIME, self._data_type, domain_hash=domain_hash, visitor_id=visitor_id, sessions=int(sessions, 10))
def GetEntries(self, parser_mediator, cookie_data=None, url=None, **unused_kwargs): """Extracts event objects from the cookie. Args: parser_mediator: A parser mediator object (instance of ParserMediator). cookie_data: The cookie data, as a byte string. url: The full URL or path where the cookie got set. """ fields = cookie_data.split(u'.') # Check for a valid record. if len(fields) != 6: raise errors.WrongPlugin( u'Wrong number of fields. [{0:d} vs. 6]'.format(len(fields))) domain_hash, visitor_id, first_visit, previous, last, sessions = fields # TODO: Double check this time is stored in UTC and not local time. try: first_epoch = int(first_visit, 10) except ValueError: first_epoch = 0 try: sessions = int(sessions, 10) except ValueError: sessions = 0 try: previous = int(previous, 10) except ValueError: previous = 0 try: last = int(last, 10) except ValueError: last = 0 event_object = GoogleAnalyticsEvent(first_epoch, u'Analytics Creation Time', url, u'utma', domain_hash=domain_hash, visitor_id=visitor_id, sessions=sessions) parser_mediator.ProduceEvent(event_object) event_object = GoogleAnalyticsEvent(previous, u'Analytics Previous Time', url, u'utma', domain_hash=domain_hash, visitor_id=visitor_id, sessions=sessions) parser_mediator.ProduceEvent(event_object) event_object = GoogleAnalyticsEvent( last, eventdata.EventTimestamp.LAST_VISITED_TIME, url, u'utma', domain_hash=domain_hash, visitor_id=visitor_id, sessions=sessions) parser_mediator.ProduceEvent(event_object)
def GetEntries(self, parser_mediator, cookie_data=None, url=None, **unused_kwargs): """Extracts event objects from the cookie. Args: parser_mediator: A parser mediator object (instance of ParserMediator). cookie_data: The cookie data, as a byte string. url: The full URL or path where the cookie got set. """ fields = cookie_data.split(u'.') if len(fields) > 5: variables = u'.'.join(fields[4:]) fields = fields[0:4] fields.append(variables) if len(fields) != 5: raise errors.WrongPlugin( u'Wrong number of fields. [{0:d} vs. 5]'.format(len(fields))) domain_hash, last, sessions, sources, variables = fields extra_variables = variables.split(u'|') kwargs = {} for variable in extra_variables: key, _, value = variable.partition(u'=') # Cookies can have a variety of different encodings, usually ASCII or # UTF-8, and values may additionally be URL encoded. urllib only correctly # url-decodes ASCII strings, so we'll convert our string to ASCII first. try: ascii_value = value.encode(u'ascii') except UnicodeEncodeError: ascii_value = value.encode(u'ascii', errors=u'ignore') parser_mediator.ProduceParseError( u'Cookie contains non 7-bit ASCII characters. The characters have ' u'been removed') utf_stream = urllib.unquote(ascii_value) try: value_line = utf_stream.decode(u'utf-8') except UnicodeDecodeError: value_line = utf_stream.decode(u'utf-8', errors=u'replace') parser_mediator.ProduceParseError( u'Cookie value did not decode to value unicode string. Non UTF-8 ' u'characters have been replaced.') kwargs[key] = value_line try: last = int(last, 10) except ValueError: last = 0 try: sessions = int(sessions, 10) except ValueError: sessions = 0 try: sources = int(sources, 10) except ValueError: sources = 0 event_object = GoogleAnalyticsEvent( last, eventdata.EventTimestamp.LAST_VISITED_TIME, url, u'utmz', domain_hash=domain_hash, sessions=sessions, sources=sources, **kwargs) parser_mediator.ProduceEvent(event_object)