Пример #1
0
    def _dataset(self):
        result = "\n"
        dates = set()
        rows = []
        indexes = {}

        filters = self._get_filters()

        for filename in sorted(glob.glob(os.path.join(LOG_DIRECTORY, "*.csv")))[-config.TRENDLINE_PERIOD:]:
            with open(filename, "rb") as f:
                match = re.search(r"([\d-]+)\.csv", filename)

                if match:
                    date = match.group(1)
                else:
                    continue

                reader = csv.DictReader(f, delimiter=' ')

                for row in reader:
                    key = (row["proto"], row["dst_port"], row["dst_ip"], row["src_ip"])

                    if filters and not (addr_to_int(row["src_ip"]) in filters or addr_to_int(row["dst_ip"]) in filters):
                        continue

                    if key not in indexes:
                        indexes[key] = len(rows)
                        rows.append(row)
                    else:
                        index = indexes[key]
                        rows[index]["first_seen"] = min(int(rows[index]["first_seen"]), int(row["first_seen"]))
                        rows[index]["last_seen"] = max(int(rows[index]["last_seen"]), int(row["last_seen"]))
                        rows[index]["count"] = int(rows[index]["count"]) + int(row["count"])

        for row in rows:
            try:
                port = int(row['dst_port'])
                port_name = MISC_PORTS.get(port) or socket.getservbyport(port, row['proto'].lower())
            except Exception:
                port_name = None
            finally:
                result += "["
                for column in ("proto", "dst_port", "dst_ip", "src_ip", "first_seen", "last_seen", "count"):
                    if "_seen" in column:
                        result += '"%s",' % datetime.datetime.utcfromtimestamp(int(row[column])).strftime(TIME_FORMAT)
                    elif "_port" in column and port_name:
                        result += '"%s (%s)",' % (row[column], port_name)
                    else:
                        result += '"%s",' % row[column]
                result += "],\n"

        return result
Пример #2
0
    def _trendline_data(self):
        result = "\n"
        series = {}
        dates = set()

        filters = self._get_filters()

        for filename in sorted(glob.glob(os.path.join(
                LOG_DIRECTORY, "*.csv")))[-config.TRENDLINE_PERIOD:]:
            with open(filename, "rb") as f:
                match = re.search(r"([\d-]+)\.csv", filename)

                if match:
                    date = match.group(1)
                else:
                    continue

                reader = csv.DictReader(f, delimiter=' ')

                for row in reader:
                    if filters and not (addr_to_int(row["src_ip"]) in filters
                                        or addr_to_int(
                                            row["dst_ip"]) in filters):
                        continue

                    try:
                        port = int(row['dst_port'])
                        port_name = MISC_PORTS.get(
                            port) or socket.getservbyport(
                                port, row['proto'].lower())
                    except Exception:
                        port_name = None
                    finally:
                        serie = "%s%s%s" % (
                            row['proto'].upper(), " %s" % row['dst_port']
                            if row['dst_port'].isdigit() else "",
                            " (%s)" % port_name if port_name else "")

                    if serie not in series:
                        series[serie] = {}

                    if date not in series[serie]:
                        series[serie][date] = 0

                    series[serie][date] += 1
                    dates.add(date)

        keys = series.keys()

        if keys:
            last_date = max(dates)
            totals = {}
            for key in list(keys):
                if not filters:
                    if any(series[key].get(date, 0) <
                           config.TRENDLINE_DAILY_THRESHOLD for date in dates
                           if date != last_date):
                        if all(series[key].get(date, 0) <
                               config.TRENDLINE_DAILY_BURST for date in dates):
                            del keys[keys.index(key)]
                totals[key] = series[key].get(last_date, 0)

            keys = sorted(keys, key=lambda key: totals[key], reverse=True)
            result += "['Date',%s],\n" % ','.join("'%s'" % key for key in keys)

            for date in sorted(dates):
                year, month, day = date.split('-')
                result += "[new Date(%s,%d,%s)," % (year, int(month) - 1, day)
                for serie in keys:
                    result += "%s," % series[serie].get(date, 0)
                result += "],\n"

            result = result[:-1]

        return result
Пример #3
0
    def _dataset(self):
        result = "\n"
        dates = set()
        rows = []
        indexes = {}

        filters = self._get_filters()

        for filename in sorted(glob.glob(os.path.join(
                LOG_DIRECTORY, "*.csv")))[-config.TRENDLINE_PERIOD:]:
            with open(filename, "rb") as f:
                match = re.search(r"([\d-]+)\.csv", filename)

                if match:
                    date = match.group(1)
                else:
                    continue

                reader = csv.DictReader(f, delimiter=' ')

                for row in reader:
                    key = (row["proto"], row["dst_port"], row["dst_ip"],
                           row["src_ip"])

                    if filters and not (addr_to_int(row["src_ip"]) in filters
                                        or addr_to_int(
                                            row["dst_ip"]) in filters):
                        continue

                    if key not in indexes:
                        indexes[key] = len(rows)
                        rows.append(row)
                    else:
                        index = indexes[key]
                        rows[index]["first_seen"] = min(
                            int(rows[index]["first_seen"]),
                            int(row["first_seen"]))
                        rows[index]["last_seen"] = max(
                            int(rows[index]["last_seen"]),
                            int(row["last_seen"]))
                        rows[index]["count"] = int(rows[index]["count"]) + int(
                            row["count"])

        for row in rows:
            try:
                port = int(row['dst_port'])
                port_name = MISC_PORTS.get(port) or socket.getservbyport(
                    port, row['proto'].lower())
            except Exception:
                port_name = None
            finally:
                result += "["
                for column in ("proto", "dst_port", "dst_ip", "src_ip",
                               "first_seen", "last_seen", "count"):
                    if "_seen" in column:
                        result += '"%s",' % datetime.datetime.utcfromtimestamp(
                            int(row[column])).strftime(TIME_FORMAT)
                    elif "_port" in column and port_name:
                        result += '"%s (%s)",' % (row[column], port_name)
                    else:
                        result += '"%s",' % row[column]
                result += "],\n"

        return result
Пример #4
0
    def _trendline_data(self):
        result = "\n"
        series = {}
        dates = set()

        filters = self._get_filters()

        for filename in sorted(glob.glob(os.path.join(LOG_DIRECTORY, "*.csv")))[-config.TRENDLINE_PERIOD:]:
            with open(filename, "rb") as f:
                match = re.search(r"([\d-]+)\.csv", filename)

                if match:
                    date = match.group(1)
                else:
                    continue

                reader = csv.DictReader(f, delimiter=' ')

                for row in reader:
                    if filters and not (addr_to_int(row["src_ip"]) in filters or addr_to_int(row["dst_ip"]) in filters):
                        continue

                    try:
                        port = int(row['dst_port'])
                        port_name = MISC_PORTS.get(port) or socket.getservbyport(port, row['proto'].lower())
                    except Exception:
                        port_name = None
                    finally:
                        serie = "%s%s%s" % (row['proto'].upper(), " %s" % row['dst_port'] if row['dst_port'].isdigit() else "", " (%s)" % port_name if port_name else "")

                    if serie not in series:
                        series[serie] = {}

                    if date not in series[serie]:
                        series[serie][date] = 0

                    series[serie][date] += 1
                    dates.add(date)

        keys = series.keys()

        if keys:
            last_date = max(dates)
            totals = {}
            for key in list(keys):
                if not filters:
                    if any(series[key].get(date, 0) < config.TRENDLINE_DAILY_THRESHOLD for date in dates if date != last_date):
                        if all(series[key].get(date, 0) < config.TRENDLINE_DAILY_BURST for date in dates):
                            del keys[keys.index(key)]
                totals[key] = series[key].get(last_date, 0)

            keys = sorted(keys, key=lambda key: totals[key], reverse=True)
            result += "['Date',%s],\n" % ','.join("'%s'" % key for key in keys)

            for date in sorted(dates):
                year, month, day = date.split('-')
                result += "[new Date(%s,%d,%s)," % (year, int(month) - 1, day)
                for serie in keys:
                    result += "%s," % series[serie].get(date, 0)
                result += "],\n"

            result = result[:-1]

        return result