def process(self): report = self.receive_message() if report is None or not report.contains("raw"): self.acknowledge_message() return raw_report = utils.base64_decode(report.get("raw")) source_time = None for row in raw_report.splitlines(): if row.startswith("# last updated:"): source_time = dateutil.parser.parse(row[row.find(':')+1:], tzinfos=self.TZOFFSETS) source_time = source_time.isoformat() continue if row.startswith('#'): continue row_split = row.split() event_infected = Event(report) event_infected.add('time.source', source_time) event_infected.add('classification.type', 'malware') if row_split[0] != '/': event_infected.add('source.ip', row_split[0]) event_infected.add('source.fqdn', row_split[1]) event_infected.add('source.url', row_split[2]) event_infected.add('destination.fqdn', row_split[3]) event_infected.add('event_description.text', 'has malicious code redirecting to malicious ' 'host') event_infected.add('raw', row) self.send_message(event_infected) event_compromised = Event(report) event_compromised.add('time.source', source_time) event_compromised.add('classification.type', 'compromised') if row_split[0] != '/': event_compromised.add('destination.ip', row_split[0]) event_compromised.add('destination.fqdn', row_split[1]) event_compromised.add('destination.url', row_split[2]) event_compromised.add('source.fqdn', row_split[3]) event_compromised.add('event_description.text', 'host has been compromised and has ' 'malicious code infecting users') event_compromised.add('raw', row) self.send_message(event_compromised) self.acknowledge_message()
def process(self): """ The Ranswomware Tracker has comments in it. The IP address field can also have more than one address. The ASN and Country code are being ignored, an expert parser can get those added. """ report = self.receive_message() raw_report = utils.base64_decode(report.get("raw")) for row in csv.reader(io.StringIO(raw_report)): if row[0].startswith('#'): continue if '|' in row[7]: for ipaddr in row[7].split('|'): new_row = '"' + row[0] + '","' + row[1] + '","' + row[2] + '","' + row[3] \ + '","' + row[4] + '","' + row[5] + '","' + row[6] + '","' + ipaddr \ + '","' + row[8] + '","' + row[9] + '"' for nrow in csv.reader(io.StringIO(new_row)): ev = Event(report) ev.add('classification.identifier', nrow[2].lower()) ev.add('classification.type', 'c&c') ev.add('time.source', nrow[0] + ' UTC', force=True) ev.add('status', nrow[5]) ev.add('source.ip', nrow[7]) ev.add('raw', ','.join(nrow)) if FQDN.is_valid(nrow[3]): ev.add('source.fqdn', nrow[3]) if URL.is_valid(nrow[4]): ev.add('source.url', nrow[4]) self.send_message(ev) else: event = Event(report) event.add('classification.identifier', row[2].lower()) event.add('classification.type', 'c&c') event.add('time.source', row[0] + ' UTC') event.add('status', row[5]) event.add('raw', ','.join(row)) if IPAddress.is_valid(row[7]): event.add('source.ip', row[7]) if FQDN.is_valid(row[3]): event.add('source.fqdn', row[3]) if URL.is_valid(row[4]): event.add('source.url', row[4]) self.send_message(event) self.acknowledge_message()
def process(self): report = self.receive_message() raw_report = utils.base64_decode(report.get("raw")) for row in raw_report.split('<tr>'): # Get IP and Type info1 = re.search( ">[\ ]*(\d+\.\d+\.\d+\.\d+)[\ ]*<.*</td><td>([^<]+)</td>", row) if not info1: continue # Get Timestamp info2 = re.search( "<td>[\ ]*(\d{4}-\d{2}-\d{2}\ \d{2}:\d{2}:\d{2})[\ ]*</td>", row) event = Event(report) description = info1.group(2) description = utils.decode(description) event_type = self.get_type(description) time_source = info2.group(1) + " UTC-8" event.add("time.source", time_source) event.add("source.ip", info1.group(1)) event.add('classification.type', event_type) event.add('event_description.text', description) event.add("raw", row) self.send_message(event) self.acknowledge_message()
def process(self): report = self.receive_message() if report is None or not report.contains("raw"): self.acknowledge_message() return columns = [ "source.ip", "__IGNORE__", "event_description.text", "__IGNORE__" ] headers = True raw_report = utils.base64_decode(report.get("raw")) for row in utils.csv_reader(raw_report): # ignore headers if headers: headers = False continue event = Event(report) for key, value in zip(columns, row): if key == "__IGNORE__": continue event.add(key, value) event.add('classification.type', u'scanner') event.add("raw", ",".join(row)) self.send_message(event) self.acknowledge_message()
def parse_line(self, line, report): event = Event(report) info = line.split("<td>") if len(line) <= 0 or len(info) < 3: return ip = info[1].split('</td>')[0].strip() last_seen = info[2].split('</td>')[0].strip() + '-05:00' description = self.parser.unescape(info[3].split('</td>')[0].strip()) for key in ClassificationType.allowed_values: if description.lower().find(key.lower()) > -1: event.add("classification.type", key) break else: for key, value in TAXONOMY.items(): if description.lower().find(key.lower()) > -1: event.add("classification.type", value) break if not event.contains("classification.type"): event.add("classification.type", 'unknown') event.add("time.source", last_seen) event.add("source.ip", ip) event.add("event_description.text", description) event.add("raw", line + "</tr>") yield event
def process(self): report = self.receive_message() columns = self.parameters.columns if not report or not report.contains("raw"): self.acknowledge_message() return if report: raw_report = utils.base64_decode(report.get("raw")) rows = csv.DictReader(StringIO(raw_report)) for row in rows: event = Event(report) for key, value in row.items(): key = columns[key] if not value: continue value = value.strip() if key == u'__IGNORE__' or key == u'__TBD__': continue event.add(key, value, sanitize=True) event.add('classification.type', u'vulnerable service') self.send_message(event) self.acknowledge_message()
def process(self): report = self.receive_message() raw_report = utils.base64_decode(report.get("raw")) raw_report = raw_report.split("<tbody>")[1] raw_report = raw_report.split("</tbody>")[0] raw_report_splitted = raw_report.split("<tr>") for row in raw_report_splitted: row = row.strip() if row == "": continue row_splitted = row.split("<td>") ip = row_splitted[1].split('">')[1].split("<")[0].strip() time_source = row_splitted[6].replace("</td></tr>", "").strip() time_source = time_source + " 00:00:00 UTC" event = Event(report) event.add('time.source', time_source) event.add('classification.type', 'malware') event.add('source.ip', ip) event.add('raw', row) self.send_message(event) self.acknowledge_message()
def process(self): report = self.receive_message() if report is None or not report.contains("raw"): self.acknowledge_message() return raw_report = utils.base64_decode(report.get("raw")).strip() row = raw_report.splitlines()[0] time_str = row[row.find('(') + 1:row.find(')')] time = dateutil.parser.parse(time_str).isoformat() for row in raw_report.split('\n'): val = row.strip() if not len(val) or val.startswith('#') or val.startswith('//'): continue event = Event(report) if IPAddress.is_valid(val): event.add('source.ip', val) else: event.add('source.network', val) event.add('time.source', time) event.add('classification.type', u'blacklist') event.add('raw', row) self.send_message(event) self.acknowledge_message()
def process(self): report = self.receive_message() if not report: self.acknowledge_message() return if not report.contains("raw"): self.acknowledge_message() raw_report = utils.base64_decode(report.get("raw")) for row in raw_report.split('\n'): row = row.strip() if row.startswith("#") or len(row) == 0: continue event = Event(report) event.add('classification.type', u'c&c') event.add('source.fqdn', row) event.add("raw", row) event.add("malware.name", SOURCE_FEEDS[report.get("feed.url")]) self.send_message(event) self.acknowledge_message()
def process(self): report = self.receive_message() if not report or not report.contains("raw"): self.acknowledge_message() return raw_report = utils.base64_decode(report.get("raw")) for row in utils.csv_reader(raw_report): event = Event(report) self.logger.debug(repr(row)) event.add("time.source", row[0].replace('_', ' ') + " UTC") if row[1] != '-': event.add("source.url", self.add_http(row[1])) try: event.add("source.ip", row[2]) except InvalidValue: event.add("source.url", self.add_http(row[2])) event.add('source.ip', urlparse(row[2]).netloc) event.add("source.reverse_dns", row[3]) event.add("event_description.text", row[4]) # TODO: ignore abuse contact for now event.add("source.asn", int(row[6])) event.add('classification.type', u'malware') event.add("raw", ",".join(row)) self.send_message(event) self.acknowledge_message()
def test_intelmq_exchange(self): """ Test if correct Events have been produced, sent from a TCP Output of another IntelMQ instance. We spawn independent process of the TCPOutput bot that sends a bunch of messages. """ bot = TestTCPOutputBot() bot.setUpClass() bot.input_message = [] msg_count = 100 for i in range(msg_count): bot.input_message.append( Event(INPUT1, harmonization=self.harmonization)) (Process(target=bot._delayed_start)).start() self.run_bot() self.assertOutputQueueLen(msg_count) for i, msg in enumerate(self.get_output_queue()): report = MessageFactory.unserialize( msg, harmonization=self.harmonization, default_type='Event') output = MessageFactory.unserialize( utils.base64_decode(report["raw"]), harmonization=self.harmonization, default_type='Event') self.assertDictEqual(output, INPUT1) del report['time.observation'] del report['raw'] self.assertDictEqual(report, REPORT1)
def process(self): report = self.receive_message() raw_report = utils.base64_decode(report.get("raw")) for row in csv.DictReader(io.StringIO(raw_report)): event = Event(report) for key, value in row.items(): if not value: continue if key is None: self.logger.warning('Value without key found, skipping the' ' value: {!r}'.format(value)) continue key = COLUMNS[key] if key == "__IGNORE__" or key == "__TDB__": continue if key == "source.fqdn" and IPAddress.is_valid(value, sanitize=True): continue if key == "time.source": value = value + " UTC" event.add(key, value) event.add('classification.type', 'phishing') event.add("raw", ",".join(row)) self.send_message(event) self.acknowledge_message()
def process(self): report = self.receive_message() if not report or not report.contains("raw"): self.acknowledge_message() return raw_report = utils.base64_decode(report.value("raw")) for row in csv.reader(StringIO(raw_report)): event = Event(report) event.add("time.source", row[0].replace('_', ' ') + " UTC", sanitize=True) if row[1] != '-': event.add("source.url", row[1], sanitize=True) try: event.add("source.ip", row[2], sanitize=True) except InvalidValue: event.add("source.url", row[2], sanitize=True) event.add('source.ip', urlparse(row[2]).netloc, sanitize=True) event.add("source.reverse_dns", row[3], sanitize=True) event.add("event_description.text", row[4], sanitize=True) # TODO: ignore abuse contact for now event.add("source.asn", int(row[6])) event.add('classification.type', u'malware') event.add("raw", ",".join(row), sanitize=True) self.send_message(event) self.acknowledge_message()
def process(self): report = self.receive_message() if report is None or not report.contains("raw"): self.acknowledge_message() return raw_report = utils.base64_decode(report.get("raw")) for row in raw_report.split('\n'): row = row.rstrip() if row.startswith("#") or len(row) == 0: continue values = row.split('\t')[1:] event = Event(report) event.add('source.fqdn', values[1]) event.add('event_description.text', values[2]) for i in range(4, len(values)): if is_valid_date(values[i]): event.add('time.source', # TODO: verify timezone values[i] + "T00:00:00+00:00", force=True) break event.add('classification.type', u'malware') event.add('raw', row) self.send_message(event) self.acknowledge_message()
def process(self): report = self.receive_message() raw_report = utils.base64_decode(report.get("raw")) for row in raw_report.splitlines(): if row.startswith('#'): continue event = Event(report) match = re.search(REGEX_IP, row) if match: ip = match.group() match = re.search(REGEX_TIMESTAMP, row) if match: timestamp = match.group(1) + " UTC" event.add('time.source', timestamp) event.add('source.ip', ip) event.add('classification.type', 'brute-force') event.add("raw", row) self.send_message(event) self.acknowledge_message()
def process(self): report = self.receive_message() if report is None or not report.contains("raw"): self.acknowledge_message() return columns = [ "__IGNORE__", "source.url", "event_description.url", "time.source", "__IGNORE__", "__IGNORE__", "__IGNORE__", "event_description.target" ] raw_report = utils.base64_decode(report.get("raw")) for row in csv.reader(io.StringIO(raw_report)): # ignore headers if "phish_id" in row: continue event = Event(report) for key, value in zip(columns, row): if key == "__IGNORE__": continue event.add(key, value) event.add('classification.type', u'phishing') event.add("raw", ",".join(row)) self.send_message(event) self.acknowledge_message()
def process(self): report = self.receive_message() if not report: self.acknowledge_message() return if not report.contains("raw"): self.acknowledge_message() raw_report = utils.base64_decode(report.value("raw")) for row in raw_report.split('\n'): row = row.strip() if row == "" or row.startswith("#"): continue event = Event(report) if IPAddress.is_valid(row, sanitize=True): event.add('source.ip', row, sanitize=True) else: event.add('source.fqdn', row, sanitize=True) event.add('classification.type', u'malware') event.add('raw', row, sanitize=True) self.send_message(event) self.acknowledge_message()
def parse_line(self, line, report): if line.startswith('#'): self.tempdata.append(line) else: splitted_row = line.split('|') event = Event(report) columns = [ "source.asn", "source.as_name", "source.ip", "time.source" ] for key, value in zip(columns, splitted_row): value = value.strip() if key == "time.source": value += "+00:00" if value == "NA": continue event.add(key, value) event.add("classification.type", "brute-force") event.add("protocol.application", "ssh") event.add("protocol.transport", "tcp") event.add("destination.port", 22) event.add("raw", line) yield event
def parse_line(self, line, report): if line.startswith('#') or len(line) == 0: self.tempdata.append(line) else: event = Event(report) line_contents = line.split('|') feed_name = line_contents[-1].strip() file_format = FILE_FORMATS.get(feed_name) or FILE_FORMATS['_default'] if len(line_contents) != len(file_format) + 1: raise ValueError(f'Incorrect format for feed {event.get("feed.url")}, found line: "{line}"') if feed_name not in CATEGORY: raise ValueError(f'Unknown data feed {feed_name}.') event.update(CATEGORY[feed_name]) for field, (field_name, converter) in zip(line_contents, file_format): value = converter(field.strip()) if value is not None: event.add(field_name, value) event.add('raw', line) yield event
def process(self): report = self.receive_message() if report is None or not report.contains("raw"): self.acknowledge_message() return raw_report = utils.base64_decode(report.get("raw")) for row in raw_report.split('\n'): row = row.strip() if len(row) == 0 or row.startswith('#'): continue event = Event(report) splitted_row = row.split() columns = ["source.url"] for key, value in zip(columns, splitted_row): event.add(key, value) event.add('classification.type', u'malware') event.add("raw", row) self.send_message(event) self.acknowledge_message()
def process(self): report = self.receive_message() raw_report = utils.base64_decode(report.get("raw")) lastgenerated = None for row in raw_report.splitlines(): event = Event(report) row = row.strip() if len(row) == 0: continue elif row.startswith("#"): if 'Generated on' in row: row = row.strip('# ')[13:] lastgenerated = dateutil.parser.parse(row).isoformat() continue event.add('time.source', lastgenerated) event.add('classification.type', 'c&c') event.add('source.fqdn', row) event.add("raw", row) event.add("malware.name", SOURCE_FEEDS[report.get("feed.url")]) self.send_message(event) self.acknowledge_message()
def process(self): report = self.receive_message() if report is None or not report.contains("raw"): self.acknowledge_message() return raw_report = utils.base64_decode(report.value("raw")) for row in raw_report.split('\n'): if row.startswith('#'): continue event = Event(report) match = re.search(REGEX_IP, row) if match: ip = match.group() match = re.search(REGEX_TIMESTAMP, row) if match: timestamp = match.group(1) + " UTC" event.add('time.source', timestamp, sanitize=True) event.add('source.ip', ip, sanitize=True) event.add('classification.type', u'brute-force') event.add("raw", row, sanitize=True) self.send_message(event) self.acknowledge_message()
def test_multiple_bots(self): """ Let's simulate multiple IntelMQ instances are pushing the events at once! Every message must be queued. Note that if too much clients want connect, connections are refused. """ client_count = 5 msg_count = 300 for _ in range(client_count): bot = TestTCPOutputBot() bot.setUpClass() # bot.bot_id = "test-client-{}".format(_) bot.input_message = [] for i in range(msg_count): bot.input_message.append(Event(INPUT1, harmonization=self.harmonization)) Process(target=bot._delayed_start).start() thread = threading.Thread(target=Client().random_client) thread.start() self.input_message = None # can't use standard .bot_run(iteration) or .start() because shutdown() would be called # and we need to handle multiple connections self.prepare_bot() self.bot._Bot__source_pipeline = self.pipe self.bot._Bot__destination_pipeline = self.pipe for _ in range(client_count + 1): # every single calling of process() method will serve to a single connection with mock.patch('intelmq.lib.utils.load_configuration', new=self.mocked_config): with mock.patch('intelmq.lib.utils.log', self.mocked_log): self.bot.process() self.bot.stop() # let's call shutdown() and free up bound address self.assertOutputQueueLen(client_count * msg_count + 2)
def process(self): report = self.receive_message() raw_report = utils.base64_decode(report["raw"]) for row in csv.DictReader(io.StringIO(raw_report), dictreader=True): event = Event(report) extra = {} event.add('time.source', row['timestamp']+' UTC') event.add('source.ip', row['ip']) event.add('protocol.transport', row['protocol']) event.add('source.port', row['port']) event.add('source.reverse_dns', row['hostname']) extra['sysdesc'] = row['sysdesc'] extra['sysname'] = row['sysname'] event.add('source.asn', row['asn']) event.add('source.geolocation.cc', row['geo']) event.add('source.geolocation.region', row['region']) event.add('source.geolocation.city', row['city']) if int(row['naics']): extra['naics'] = int(row['naics']) if int(row['sic']): extra['sic'] = int(row['sic']) if row['sector']: extra['sector'] = row['sector'] event.add('extra', extra) event.add('protocol.application', 'snmp') event.add('classification.type', 'vulnerable service') event.add('classification.identifier', 'snmp') event.add('raw', '"'+','.join(map(str, row.items()))+'"') self.send_message(event) self.acknowledge_message()
def parse_line(self, row, report): if not row.startswith('http'): return url_object = urlparse(row) if not url_object: return url = url_object.geturl() hostname = url_object.hostname port = url_object.port event = Event(report) if IPAddress.is_valid(hostname): event.add("source.ip", hostname) else: event.add("source.fqdn", hostname) event.add('classification.type', 'malware') event.add("source.url", url) if port: event.add("source.port", port) event.add("raw", row) self.send_message(event)
def process(self): report = self.receive_message() raw_report = utils.base64_decode(report.get("raw")) for row in raw_report.splitlines(): row = row.strip() if len(row) == 0 or row.startswith('#'): continue splitted_row = row.split() event = Event(report) columns = ["source.ip", "time.source"] for key, value in zip(columns, splitted_row): if key == "time.source": value = datetime.utcfromtimestamp( int(value)).strftime('%Y-%m-%d %H:%M:%S') + " UTC" event.add(key, value.strip()) event.add('classification.type', 'blacklist') event.add("raw", row) self.send_message(event) self.acknowledge_message()
def process(self): report = self.receive_message() if report is None or not report.contains("raw"): self.acknowledge_message() return raw_report = utils.base64_decode(report.get("raw")) raw_report = raw_report.strip() url = report.get('feed.url') path = urlparse(url).path filename = posixpath.basename(path) for row in raw_report.split('\n'): event = Event(report) event.add('source.ip', row.strip()) if filename in MAPPING: for key, value in MAPPING[filename].items(): event.add(key, value) else: event.add('classification.type', 'blacklist') event.add("raw", row) self.send_message(event) self.acknowledge_message()
def process(self): report = self.receive_message() if not report: self.acknowledge_message() return if not report.contains("raw"): self.acknowledge_message() raw_report = utils.base64_decode(report.get("raw")) for row in raw_report.split('\n'): row = row.strip() if row == "" or row.startswith("#"): continue event = Event(report) event.add('source.ip', row) event.add('classification.type', u'malware') event.add('raw', row) self.send_message(event) self.acknowledge_message()
def process(self): report = self.receive_message() if report is None or not report.contains("raw"): self.acknowledge_message() return raw_report = utils.base64_decode(report.get("raw")) for row in raw_report.split('\n'): if row.startswith("#") or len(row) == 0 or row == "Site": if 'updated' in row: time_str = row[row.find(': ') + 2:] time = dateutil.parser.parse(time_str).isoformat() continue event = Event(report) event.add('classification.type', u'malware') event.add('source.fqdn', row.strip()) event.add('time.source', time) event.add("raw", row) self.send_message(event) self.acknowledge_message()
def parse_line(self, row, report): values = row.split("#") # Send one event per classification classification_types = list() if values[3].strip().find(";") > 0: classification_types.extend(values[3].split(";")) else: classification_types.append(values[3]) for ctype in classification_types: event = Event(report) if ctype.lower() in CLASSIFICATION: event.add('classification.type', CLASSIFICATION[ctype.lower()]) else: event.add('classification.type', "unknown") if len(values[6].strip()) > 0: geo_coordinates = values[6].strip().split(",") if len(geo_coordinates) == 2: geo_latitude = geo_coordinates[0] geo_longitude = geo_coordinates[1] event.add('source.ip', values[0].strip()) event.add('source.geolocation.cc', values[4].strip()) event.add('source.geolocation.city', values[5].strip()) event.add('source.geolocation.latitude', geo_latitude.strip()) event.add('source.geolocation.longitude', geo_longitude.strip()) event.add("raw", row) yield event