def validate_bl_headers(self, message): if ('_bl-series-id' not in message or '_bl-series-total' not in message or '_bl-series-no' not in message or '_bl-time' not in message or 'expires' not in message): raise n6QueueProcessingException( "Invalid message for a black list") try: parse_iso_datetime_to_utc(message["expires"]) except ValueError: raise n6QueueProcessingException("Invalid expiry date")
def process_event(self, data): event_time = parse_iso_datetime_to_utc(data['time']) event = self.groups.get(data['_group']) if self.time is None: self.time = event_time if event_time + self.time_tolerance < self.time: if event is None or event.first > event_time: LOGGER.error('Event out of order. Ignoring. Data: %s', data) raise n6QueueProcessingException('Event out of order.') else: LOGGER.info('Event out of order, but not older than group\'s first event, ' 'so it will be added to existing aggregate group. Data: %s', data) event.until = max(event.until, event_time) event.count += 1 # XXX: see ticket #6243 return False if event is None: if event_time < self.time: # unordered event, self.buffer may contain suppressed event LOGGER.debug("Unordered event of the '%s' group, '%s' source within time " "tolerance. Check and update buffer.", data['_group'], data['source']) buffered_event = self.buffer.get(data['_group']) if buffered_event is not None: buffered_event.count += 1 # XXX: see ticket #6243 self.buffer[data['_group']] = buffered_event return False # Event not seen before - add new event to group LOGGER.debug("A new group '%s' for '%s' source began to be aggregated, " "first event is being generated.", data['_group'], data['source']) self.groups[data['_group']] = HiFreqEventData(data) # XXX: see ticket #6243 self.update_time(parse_iso_datetime_to_utc(data['time'])) return True if (event_time > event.until + datetime.timedelta(hours=AGGREGATE_WAIT) or event_time.date() > self.time.date()): LOGGER.debug("A suppressed event is generated for the '%s' group of " "'%s' source due to passing of %s hours between events.", data['_group'], data['source'], AGGREGATE_WAIT) # 24 hour aggregation or AGGREGATE_WAIT time passed between events in group del self.groups[data['_group']] self.groups[data['_group']] = HiFreqEventData(data) # XXX: see ticket #6243 self.buffer[data['_group']] = event self.update_time(parse_iso_datetime_to_utc(data['time'])) return True # Event for existing group and still aggregating LOGGER.debug("Event is being aggregated in the '%s' group of the '%s' source.", data['_group'], data['source']) event.count += 1 # XXX: see ticket #6243 if event_time > event.until: event.until = event_time del self.groups[data['_group']] self.groups[data['_group']] = event self.update_time(parse_iso_datetime_to_utc(data['time'])) return False
def process_row_fields(self, data, parsed, _empty, _next_date, fqdn, *fields): parsed['time'] = data['properties.timestamp'] parsed['expires'] = ( parse_iso_datetime_to_utc(data['properties.timestamp']) + datetime.timedelta(days=2)) parsed['fqdn'] = fqdn
def process_row_fields(self, data, parsed, fqdn): parsed['fqdn'] = fqdn.strip() parsed['time'] = data['properties.timestamp'] parsed['expires'] = ( parse_iso_datetime_to_utc(data['properties.timestamp']) + self._time_delta) return parsed
def get_dt_header(self, header_key): """ A helper method to retrieve a response header as a date+time. Args/kwargs: `header_key`: The name of the HTTP response header. Returns: `None` or UTC date+time as a `datetime.datetime` instance (a naive one, i.e., without explicit timezone information). Example usage: with RequestPerformer('GET', 'http://example.com/FOO') as perf: foo_last_modified = perf.get_dt_header('Last-Modified') if foo_last_modified is None: print 'I have no idea when FOO was modified.` else: print 'FOO modification date+time:', foo_last_modified.isoformat() """ raw_value = (self.response.headers.get(header_key) or '').strip() if raw_value: for dt_format in self._HTTP_DATETIME_FORMATS: try: return datetime.datetime.strptime(raw_value, dt_format) except ValueError: pass try: return parse_iso_datetime_to_utc(raw_value) except ValueError: pass return None
def _get_expires_on(cls, file_path): [expires_on_str] = cls._bash( r"""LC_ALL=C date -u -d "`openssl x509 -noout """ r"""-in "{0}" -enddate """ r"""| sed -r 's/\w+=(.*)/\1/'`" '+%Y-%m-%dT%H:%M:%S'""".format( file_path)).splitlines() return parse_iso_datetime_to_utc(expires_on_str)
def _get_valid_from(cls, file_path): [valid_from_str] = cls._bash( r"""LC_ALL=C date -u -d "`openssl x509 -noout """ r"""-in "{0}" -startdate """ r"""| sed -r 's/\w+=(.*)/\1/'`" '+%Y-%m-%dT%H:%M:%S'""".format( file_path)).splitlines() return parse_iso_datetime_to_utc(valid_from_str)
def process_row_fields(self, data, parsed, ip, _asn, _next_date, fqdn, _type, _origin, _dateverified, *fields): parsed['time'] = data['properties.timestamp'] parsed['expires'] = ( parse_iso_datetime_to_utc(data['properties.timestamp']) + datetime.timedelta(days=2)) parsed['fqdn'] = fqdn parsed['address'] = {'ip': ip}
def process_new_message(self, data): """Processes a message and validates agains db to detect new/change/update. Adds new entry to db if necessary (new) or updates entry (change/update) and stores flag in db for processed event. """ source_data = self.comp_data.get_or_create_sourcedata(data['source']) result = source_data.process_event(data) source_data.update_time(parse_iso_datetime_to_utc(data['_bl-time'])) return result
def __init__(self, payload): self.id = payload.get("id") self.source = payload.get("source") self.url = payload.get("url") self.fqdn = payload.get("fqdn") self.ip = [str(addr["ip"]) for addr in payload.get("address") ] if payload.get("address") is not None else [] self.flag = payload.get("flag") self.expires = parse_iso_datetime_to_utc(payload.get("expires")) self.payload = payload.copy()
def process_event(self, data): event_time = parse_iso_datetime_to_utc(data['_bl-time']) if self.time is None: self.time = event_time if event_time < self.time: LOGGER.error('Event out of order. Ignoring.\nData: %s', data) raise n6QueueProcessingException( 'Event belongs to blacklist' ' older than the last one processed.') event_key = self.get_event_key(data) event = self.blacklist.get(event_key) if event is None: # new bl event new_event = BlackListData(data) new_event.flag = data.get("_bl-series-id") self.blacklist[event_key] = new_event return 'bl-new', new_event.payload else: # existing ips_old = event.ip ips_new = [x["ip"] for x in data.get("address") ] if data.get("address") is not None else [] if self._are_ips_different(ips_old, ips_new): data["replaces"] = event.id new_event = BlackListData(data) new_event.flag = data.get("_bl-series-id") self.blacklist[event_key] = new_event return "bl-change", new_event.payload elif parse_iso_datetime_to_utc( data.get("expires")) != event.expires: event.expires = parse_iso_datetime_to_utc(data.get("expires")) event.flag = data.get("_bl-series-id") event.update_payload({"expires": data.get("expires")}) self.blacklist[event_key] = event return "bl-update", event.payload else: event.flag = data.get("_bl-series-id") self.blacklist[event_key] = event return None, event.payload
def get_bl_current_time_from_data(self, data, parsed): if self.bl_current_time_regex: match = self.bl_current_time_regex.search(data['raw']) if match: bl_current_time = match.group(self.bl_current_time_regex_group) if bl_current_time: if self.bl_current_time_format: return datetime.strptime(bl_current_time, self.bl_current_time_format) return parse_iso_datetime_to_utc(bl_current_time) return None
def process_row_fields(self, data, parsed, _empty, _next_date, fqdn, malware_type, *fields): parsed['time'] = data['properties.timestamp'] parsed['expires'] = ( parse_iso_datetime_to_utc(data['properties.timestamp']) + datetime.timedelta(days=2)) parsed['fqdn'] = fqdn if malware_type == 'phishing': parsed['category'] = 'phish' else: parsed['category'] = 'malurl' parsed['name'] = malware_type
def get_bl_current_time_from_data(self, data, parsed): if self.bl_current_time_regex: raw_as_str = data['raw'].decode('utf-8', 'surrogateescape') match = self.bl_current_time_regex.search(raw_as_str) if match: bl_current_time = match.group(self.bl_current_time_regex_group) if bl_current_time: if self.bl_current_time_format: return datetime.strptime(bl_current_time, self.bl_current_time_format) return parse_iso_datetime_to_utc(bl_current_time) return None
def __init__(self, **kwargs): if kwargs.get('ip') is None: # adding the "no IP" placeholder ('0.0.0.0') which should be # transformed into 0 in the database (because `ip` cannot be # NULL in our SQL db; and apparently, for unknown reason, # XXX: <- check whether that's true... # IPAddress.process_bind_param() is not called by the # SQLAlchemy machinery if the value of `ip` is just None) kwargs['ip'] = IPAddress.NONE_STR kwargs['time'] = parse_iso_datetime_to_utc(kwargs["time"]) kwargs['expires'] = (parse_iso_datetime_to_utc(kwargs.get("expires")) if kwargs.get("expires") is not None else None) kwargs['modified'] = (parse_iso_datetime_to_utc(kwargs.get("modified")) if kwargs.get("modified") is not None else None) for name in self._n6columns: setattr(self, name, kwargs.pop(name, None)) ### XXX: the 'until' field is not converted here to utc datetime! ### (see ticket #3113) kwargs.pop('client', None) # here we just ignore this arg if present kwargs.pop('type', None) # here we just ignore this arg if present if kwargs: LOGGER.warning( 'n6NormalizedData.__init__() got unexpected **kwargs: %a', kwargs)
def suppressed_update(self): """ Agregated event update(change fields: until and count, to the value of suppressed event). """ LOGGER.debug('* suppressed_update() %a', self.record_dict) # add event records from RecordDict for event_record in self.record_dict.iter_db_items(): self.records['event'].append(event_record) self.json_to_record(self.records['event']) id_event = self.records['event'][0]["id"] until = self.records['event'][0]["until"] count = self.records['event'][0]["count"] # optimization: we can limit time => searching within one partition, not all; # it seems that mysql (and/or sqlalchemy?) truncates times to seconds, # we are also not 100% sure if other time data micro-distortions are not done # -- that's why here we use a 1-second-range instead of an exact value first_time_min = parse_iso_datetime_to_utc( self.record_dict["_first_time"]).replace(microsecond=0) first_time_max = first_time_min + datetime.timedelta(days=0, seconds=1) with transact: rec_count = (self.session_db.query(n6NormalizedData).filter( n6NormalizedData.time >= first_time_min, n6NormalizedData.time <= first_time_max, n6NormalizedData.id == id_event).update({ 'until': until, 'count': count })) if rec_count: LOGGER.debug("records with the same id %a exist: %a", id_event, rec_count) else: items = [] for record in self.records['event']: event = n6NormalizedData(**record) items.append(event) for record in self.records['client']: client = n6ClientToEvent(**record) items.append(client) LOGGER.warning( "suppressed_update, records with id %a DO NOT EXIST!", id_event) LOGGER.debug("insert new events,,::count:: %a", len(items)) self.insert_new_event(items, with_transact=False)
class TestDnsBhMalwareDomainsCom201412Parser(ParserTestMixIn, unittest.TestCase): RECORD_DICT_CLASS = BLRecordDict PARSER_SOURCE = 'dns-bh.malwaredomainscom' PARSER_CLASS = DnsBhMalwareDomainsCom201412Parser PARSER_BASE_CLASS = BlackListTabDataParser PARSER_CONSTANT_ITEMS = { 'restriction': 'public', 'confidence': 'low', 'category': 'other', } message_expires = str((parse_iso_datetime_to_utc(ParserTestMixIn.message_created) + datetime.timedelta(days=2))) def cases(self): yield ("""## If you do not accept these term, then do not use this information. ## For noncommercial use only. Using this information indicates you agree to be bound by these terms. ## nextvalidation domain type original_reference-why_it_was_listed dateverified ## notice notice duplication is not permitted \t20130601\tair-komplex.hu\tattackpage\tsafebrowsing.clients.google.com\t20110506\t20101201 \t20130601\tmmtrf.com\tattackpage\tsafebrowsing.clients.google.com\t20110630\t20101031 \t20130601\tmtxa.net\tattackpage\tsafebrowsing.clients.google.com\t20110630\t20101031 """ , [ dict( self.get_bl_items(1, 3), fqdn="air-komplex.hu", time=self.message_created, expires=self.message_expires, ), dict( self.get_bl_items(2, 3), fqdn="mmtrf.com", time=self.message_created, expires=self.message_expires, ), dict( self.get_bl_items(3, 3), fqdn="mtxa.net", time=self.message_created, expires=self.message_expires, ), ] )
class TestBadipsServerExploitListParser(ParserTestMixIn, unittest.TestCase): PARSER_BASE_CLASS = BlackListTabDataParser PARSER_CLASS = BadipsServerExploitListParser PARSER_CONSTANT_ITEMS = { 'restriction': 'public', 'confidence': 'low', 'category': 'server-exploit', } RECORD_DICT_CLASS = BLRecordDict PARSER_SOURCE = 'badips-com.server-exploit-list' cases = _cases_for_badips_server_exploit_list_parser message_expires = str( parse_iso_datetime_to_utc(ParserTestMixIn.message_created) + timedelta(days=2))
class TestGreenSnowParser(ParserTestMixIn, unittest.TestCase): RECORD_DICT_CLASS = BLRecordDict PARSER_SOURCE = 'greensnow-co.list-txt' PARSER_CLASS = GreenSnowParser PARSER_BASE_CLASS = BlackListTabDataParser PARSER_CONSTANT_ITEMS = { 'restriction': 'public', 'confidence': 'low', 'category': 'other', } MESSAGE_EXPIRES = str( parse_iso_datetime_to_utc(ParserTestMixIn.message_created) + datetime.timedelta(days=2)) def cases(self): yield ('1.1.1.1\n' '2.2.2.2\n' '3.3.3.3\n', [ dict( self.get_bl_items(1, 3), address=[{ 'ip': '1.1.1.1' }], time=self.message_created, expires=self.MESSAGE_EXPIRES, ), dict( self.get_bl_items(2, 3), address=[{ 'ip': '2.2.2.2' }], time=self.message_created, expires=self.MESSAGE_EXPIRES, ), dict( self.get_bl_items(3, 3), address=[{ 'ip': '3.3.3.3' }], time=self.message_created, expires=self.MESSAGE_EXPIRES, ), ])
class TestDnsBhMalwareDomainsComParser(ParserTestMixIn, unittest.TestCase): RECORD_DICT_CLASS = BLRecordDict PARSER_SOURCE = 'dns-bh.malwaredomainscom' PARSER_CLASS = DnsBhMalwareDomainsComParser PARSER_BASE_CLASS = BlackListTabDataParser PARSER_CONSTANT_ITEMS = { 'restriction': 'public', 'confidence': 'low', 'category': 'other', } message_expires = str((parse_iso_datetime_to_utc(ParserTestMixIn.message_created) + datetime.timedelta(days=2))) def cases(self): yield ("""84.2.35.134\t15545\t20130601\tair-komplex.hu\tattackpage\tsafebrowsing.clients.google.com\t20110506\t20101201\t 141.8.226.5\t\t20130601\tmmtrf.com\tattackpage\tsafebrowsing.clients.google.com\t20110630\t20101031\t 176.74.176.167\t13768\t20130601\tmtxa.net\tattackpage\tsafebrowsing.clients.google.com\t20110630\t20101031\t """ , [ dict( self.get_bl_items(1, 3), fqdn="air-komplex.hu", address=[{'ip':'84.2.35.134'}], time=self.message_created, expires=self.message_expires, ), dict( self.get_bl_items(2, 3), fqdn="mmtrf.com", address=[{'ip':'141.8.226.5'}], time=self.message_created, expires=self.message_expires, ), dict( self.get_bl_items(3, 3), fqdn="mtxa.net", address=[{'ip':'176.74.176.167'}], time=self.message_created, expires=self.message_expires, ), ] )
class TestSpam404Parser(ParserTestMixIn, unittest.TestCase): RECORD_DICT_CLASS = BLRecordDict PARSER_SOURCE = 'spam404-com.scam-list' PARSER_CLASS = Spam404Parser PARSER_BASE_CLASS = BlackListTabDataParser PARSER_CONSTANT_ITEMS = { 'restriction': 'public', 'confidence': 'low', 'category': 'scam', } MESSAGE_EXPIRES = str(parse_iso_datetime_to_utc(ParserTestMixIn.message_created) + datetime.timedelta(days=8)) def cases(self): yield ( 'example-1.com \n' 'example-2.com \n' 'example-3.com \n' , [ dict( self.get_bl_items(1, 3), fqdn='example-1.com', time=self.message_created, expires=self.MESSAGE_EXPIRES, ), dict( self.get_bl_items(2, 3), fqdn='example-2.com', time=self.message_created, expires=self.MESSAGE_EXPIRES, ), dict( self.get_bl_items(3, 3), fqdn='example-3.com', time=self.message_created, expires=self.MESSAGE_EXPIRES, ), ] )
def parse(self, data): rows = csv.reader(StringIO(data['raw']), delimiter=',', quotechar='"') for row in rows: if not row: continue with self.new_record_dict(data) as parsed: if row[1] == 'C2': parsed['category'] = 'cnc' elif row[1] in ['Distribution Site', 'Payment Site']: parsed['category'] = 'malurl' else: continue parsed['time'] = parse_iso_datetime_to_utc(row[0]) parsed['name'] = row[2] parsed['fqdn'] = row[3] ips = row[7] if ips: parsed['address'] = [{'ip': ip} for ip in set(ips.split('|'))] if row[4]: parsed['url'] = row[4] yield parsed
AbuseChSSLBlacklistDyreParser, AbuseChSSLBlacklistParser, AbuseChSSLBlacklist201902Parser, AbuseChUrlhausUrlsParser, AbuseChUrlhausUrls202001Parser, AbuseChUrlhausPayloadsUrlsParser, ) from n6.parsers.generic import ( BaseParser, BlackListTabDataParser, ) from n6.tests.parsers._parser_test_mixin import ParserTestMixIn from n6lib.datetime_helpers import parse_iso_datetime_to_utc MESSAGE_EXPIRES = str(parse_iso_datetime_to_utc(ParserTestMixIn.message_created) + datetime.timedelta(days=2)) # # Common cases def _cases_for_doms201406(self): """Cases for tests of AbuseCh...DomsParser classes.""" yield ('www.example.su\n' '# ignore comments and blank lines...\n' ' \n' '\n' 'example.RU\n' ' site.EXAMPLE.org\n' 'example.NET \n'
def process_row_fields(self, data, parsed, ip, name, *fields): parsed['name'] = name parsed['address'] = {'ip': ip} parsed['time'] = data['properties.timestamp'] parsed['expires'] = parse_iso_datetime_to_utc( parsed['time']) + self._time_delta
def _get_expires(time): return (parse_iso_datetime_to_utc(time) + datetime.timedelta(days=2))
class TestMyMailBLParser(ParserTestMixIn, unittest.TestCase): """ The template for testing e-mail sourced blacklist parsers. The `MESSAGE_EXTRA_HEADERS` attribute should contain a meta header `mail_time`, which indicates the time, the message was created. """ RECORD_DICT_CLASS = BLRecordDict PARSER_SOURCE = 'my.mail-bl' PARSER_CLASS = MyMailBLParser PARSER_BASE_CLASS = BlackListTabDataParser PARSER_CONSTANT_ITEMS = { 'restriction': '', 'confidence': '', #'category': '', #'_do_not_resolve_fqdn_to_ip': True, } # `mail_time` should be set for the mail sources MESSAGE_EXTRA_HEADERS = {'meta': {'mail_time': '2017-01-12 12:02:04'}} message_expires = ( parse_iso_datetime_to_utc(ParserTestMixIn.message_created) + datetime.timedelta(days=2)) def cases(self): # Events from this set of data will have their # `_bl-current-time` attribute fetched from the # `mail_time` AMQP meta header. yield ('01\t1.2.3.4\n' '02\t1.2.3.4,5.6.7.8\n', [ dict( self.get_bl_items(1, 2), address={'ip': '1.2.3.4'}, time=self.message_created, expires=self.message_expires, ), dict( self.get_bl_items(2, 2), address=[{ 'ip': '1.2.3.4' }, { 'ip': '5.6.7.8' }], time=self.message_created, expires=self.message_expires, ), ]) yield ( # Events from this set of data can have their # `_bl-current-time` attribute's value taken # from the e-mail's content. For these events, # the `bl_current_time` keyword argument passed # into `get_bl_items()` method should be set accordingly. '# This is an example e-mail blacklist source,\n' '# Mail generated at: 2017-01-20 12:02:03.\n' '01\t1.2.3.4\n' '02\t1.2.3.4,5.6.7.8\n', [ dict( self.get_bl_items(1, 2, bl_current_time="2017-01-20 12:02:03"), address={'ip': '1.2.3.4'}, time=self.message_created, expires=self.message_expires, ), dict( self.get_bl_items(2, 2, bl_current_time="2017-01-20 12:02:03"), address=[{ 'ip': '1.2.3.4' }, { 'ip': '5.6.7.8' }], time=self.message_created, expires=self.message_expires, ), ])
class TestMyBLParser(ParserTestMixIn, unittest.TestCase): """ The template for testing blacklist parsers. If the tested parser receives messages with a meta header 'http_last_modified' (set by a collector, if the downloaded data has an HTTP 'Last-Modified' header), then the `MESSAGE_EXTRA_HEADERS` should have this header set accordingly. """ RECORD_DICT_CLASS = BLRecordDict PARSER_SOURCE = 'my.bl' PARSER_CLASS = MyBLParser PARSER_BASE_CLASS = BlackListTabDataParser PARSER_CONSTANT_ITEMS = { 'restriction': '', 'confidence': '', #'category': '', #'_do_not_resolve_fqdn_to_ip': True, } # `http_last_modified` should be set, if the source is provided # with the HTTP `Last-Modified` header MESSAGE_EXTRA_HEADERS = { 'meta': { 'http_last_modified': '2017-01-12 12:02:04' } } message_expires = ( parse_iso_datetime_to_utc(ParserTestMixIn.message_created) + datetime.timedelta(days=2)) def cases(self): # Events from this set of data will have their # `_bl-current-time` attribute fetched from the # `http_last_modified` AMQP meta header. yield ('01\t1.2.3.4\n' '02\t1.2.3.4,5.6.7.8\n', [ dict( self.get_bl_items(1, 2), address={'ip': '1.2.3.4'}, time=self.message_created, expires=self.message_expires, ), dict( self.get_bl_items(2, 2), address=[{ 'ip': '1.2.3.4' }, { 'ip': '5.6.7.8' }], time=self.message_created, expires=self.message_expires, ), ]) yield ( # Events from this set of data can have their # `_bl-current-time` attribute's value taken # from the data. For these events, the `bl_current_time` # keyword argument passed into `get_bl_items()` method # should be set accordingly. '# This is an example blacklist source,\n' '# Feed generated at: 2017-01-20 12:02:03.\n' '01\t1.2.3.4\n' '02\t1.2.3.4,5.6.7.8\n', [ dict( self.get_bl_items(1, 2, bl_current_time="2017-01-20 12:02:03"), address={'ip': '1.2.3.4'}, time=self.message_created, expires=self.message_expires, ), dict( self.get_bl_items(2, 2, bl_current_time="2017-01-20 12:02:03"), address=[{ 'ip': '1.2.3.4' }, { 'ip': '5.6.7.8' }], time=self.message_created, expires=self.message_expires, ), ])
def __init__(self, payload): self.group = payload.get("_group") self.until = parse_iso_datetime_to_utc(payload.get('time')) self.first = parse_iso_datetime_to_utc(payload.get('time')) self.count = 1 # XXX: see ticket #6243 self.payload = payload
def normalize_row_time(self, raw_row_time): return str(parse_iso_datetime_to_utc(raw_row_time))
def __init__(self, **kwargs): self.id = kwargs.pop("id", None) self.client = kwargs.pop("client", None) self.time = parse_iso_datetime_to_utc(kwargs.pop("time"))