def _dataset(self): result = "\n" dates = set() rows = [] indexes = {} filters = self._get_filters() for filename in sorted(glob.glob(os.path.join(LOG_DIRECTORY, "*.csv")))[-config.TRENDLINE_PERIOD:]: with open(filename, "rb") as f: match = re.search(r"([\d-]+)\.csv", filename) if match: date = match.group(1) else: continue reader = csv.DictReader(f, delimiter=' ') for row in reader: key = (row["proto"], row["dst_port"], row["dst_ip"], row["src_ip"]) if filters and not (addr_to_int(row["src_ip"]) in filters or addr_to_int(row["dst_ip"]) in filters): continue if key not in indexes: indexes[key] = len(rows) rows.append(row) else: index = indexes[key] rows[index]["first_seen"] = min(int(rows[index]["first_seen"]), int(row["first_seen"])) rows[index]["last_seen"] = max(int(rows[index]["last_seen"]), int(row["last_seen"])) rows[index]["count"] = int(rows[index]["count"]) + int(row["count"]) for row in rows: try: port = int(row['dst_port']) port_name = MISC_PORTS.get(port) or socket.getservbyport(port, row['proto'].lower()) except Exception: port_name = None finally: result += "[" for column in ("proto", "dst_port", "dst_ip", "src_ip", "first_seen", "last_seen", "count"): if "_seen" in column: result += '"%s",' % datetime.datetime.utcfromtimestamp(int(row[column])).strftime(TIME_FORMAT) elif "_port" in column and port_name: result += '"%s (%s)",' % (row[column], port_name) else: result += '"%s",' % row[column] result += "],\n" return result
def _trendline_data(self): result = "\n" series = {} dates = set() filters = self._get_filters() for filename in sorted(glob.glob(os.path.join( LOG_DIRECTORY, "*.csv")))[-config.TRENDLINE_PERIOD:]: with open(filename, "rb") as f: match = re.search(r"([\d-]+)\.csv", filename) if match: date = match.group(1) else: continue reader = csv.DictReader(f, delimiter=' ') for row in reader: if filters and not (addr_to_int(row["src_ip"]) in filters or addr_to_int( row["dst_ip"]) in filters): continue try: port = int(row['dst_port']) port_name = MISC_PORTS.get( port) or socket.getservbyport( port, row['proto'].lower()) except Exception: port_name = None finally: serie = "%s%s%s" % ( row['proto'].upper(), " %s" % row['dst_port'] if row['dst_port'].isdigit() else "", " (%s)" % port_name if port_name else "") if serie not in series: series[serie] = {} if date not in series[serie]: series[serie][date] = 0 series[serie][date] += 1 dates.add(date) keys = series.keys() if keys: last_date = max(dates) totals = {} for key in list(keys): if not filters: if any(series[key].get(date, 0) < config.TRENDLINE_DAILY_THRESHOLD for date in dates if date != last_date): if all(series[key].get(date, 0) < config.TRENDLINE_DAILY_BURST for date in dates): del keys[keys.index(key)] totals[key] = series[key].get(last_date, 0) keys = sorted(keys, key=lambda key: totals[key], reverse=True) result += "['Date',%s],\n" % ','.join("'%s'" % key for key in keys) for date in sorted(dates): year, month, day = date.split('-') result += "[new Date(%s,%d,%s)," % (year, int(month) - 1, day) for serie in keys: result += "%s," % series[serie].get(date, 0) result += "],\n" result = result[:-1] return result
def _dataset(self): result = "\n" dates = set() rows = [] indexes = {} filters = self._get_filters() for filename in sorted(glob.glob(os.path.join( LOG_DIRECTORY, "*.csv")))[-config.TRENDLINE_PERIOD:]: with open(filename, "rb") as f: match = re.search(r"([\d-]+)\.csv", filename) if match: date = match.group(1) else: continue reader = csv.DictReader(f, delimiter=' ') for row in reader: key = (row["proto"], row["dst_port"], row["dst_ip"], row["src_ip"]) if filters and not (addr_to_int(row["src_ip"]) in filters or addr_to_int( row["dst_ip"]) in filters): continue if key not in indexes: indexes[key] = len(rows) rows.append(row) else: index = indexes[key] rows[index]["first_seen"] = min( int(rows[index]["first_seen"]), int(row["first_seen"])) rows[index]["last_seen"] = max( int(rows[index]["last_seen"]), int(row["last_seen"])) rows[index]["count"] = int(rows[index]["count"]) + int( row["count"]) for row in rows: try: port = int(row['dst_port']) port_name = MISC_PORTS.get(port) or socket.getservbyport( port, row['proto'].lower()) except Exception: port_name = None finally: result += "[" for column in ("proto", "dst_port", "dst_ip", "src_ip", "first_seen", "last_seen", "count"): if "_seen" in column: result += '"%s",' % datetime.datetime.utcfromtimestamp( int(row[column])).strftime(TIME_FORMAT) elif "_port" in column and port_name: result += '"%s (%s)",' % (row[column], port_name) else: result += '"%s",' % row[column] result += "],\n" return result
def _trendline_data(self): result = "\n" series = {} dates = set() filters = self._get_filters() for filename in sorted(glob.glob(os.path.join(LOG_DIRECTORY, "*.csv")))[-config.TRENDLINE_PERIOD:]: with open(filename, "rb") as f: match = re.search(r"([\d-]+)\.csv", filename) if match: date = match.group(1) else: continue reader = csv.DictReader(f, delimiter=' ') for row in reader: if filters and not (addr_to_int(row["src_ip"]) in filters or addr_to_int(row["dst_ip"]) in filters): continue try: port = int(row['dst_port']) port_name = MISC_PORTS.get(port) or socket.getservbyport(port, row['proto'].lower()) except Exception: port_name = None finally: serie = "%s%s%s" % (row['proto'].upper(), " %s" % row['dst_port'] if row['dst_port'].isdigit() else "", " (%s)" % port_name if port_name else "") if serie not in series: series[serie] = {} if date not in series[serie]: series[serie][date] = 0 series[serie][date] += 1 dates.add(date) keys = series.keys() if keys: last_date = max(dates) totals = {} for key in list(keys): if not filters: if any(series[key].get(date, 0) < config.TRENDLINE_DAILY_THRESHOLD for date in dates if date != last_date): if all(series[key].get(date, 0) < config.TRENDLINE_DAILY_BURST for date in dates): del keys[keys.index(key)] totals[key] = series[key].get(last_date, 0) keys = sorted(keys, key=lambda key: totals[key], reverse=True) result += "['Date',%s],\n" % ','.join("'%s'" % key for key in keys) for date in sorted(dates): year, month, day = date.split('-') result += "[new Date(%s,%d,%s)," % (year, int(month) - 1, day) for serie in keys: result += "%s," % series[serie].get(date, 0) result += "],\n" result = result[:-1] return result