Exemplo n.º 1
0
def process_results(results, producer, output_topic, window_duration):
    """
    Format and report detected records.

    :param results: Detected records
    :param producer: Kafka producer that sends the data to output topic
    :param output_topic: Name of the receiving kafka topic
    :param window_duration: Duration of the window
    """
    output_json = ""
    # Transform given results into the JSON
    for key, value in results.iteritems():
        if key in detectionsDict:
            # If there are additional flows for the attack that was reported.
            if (detectionsDict[key][0] + window_duration * 1000) <= value[0]:
                detectionsDict[key] = (value[0],
                                       detectionsDict[key][1] + value[1])
                output_json += get_output_json(key, value,
                                               detectionsDict[key][1])
        else:
            detectionsDict[key] = (value[0], value[1])
            output_json += get_output_json(key, value, value[1])

    if output_json:
        # Print data to standard output
        cprint(output_json)

        # Check if dictionary cleaning is necessary
        clean_old_data_from_dictionary(window_duration)

        # Send results to the specified kafka topic
        kafkaIO.send_data_to_kafka(output_json, producer, output_topic)
Exemplo n.º 2
0
def process_results(data_to_process, producer, output_topic):
    """
    Process analyzed data and modify it into desired output.

    JSON format:
    {
        "@type": "protocols_statistics",
        "protocol" : <protocol>,
        "flows": <#flows>,
        "packets": <#packets>,
        "bytes": <#bytes>
    }

    :param data_to_process: analyzed data
    :param producer: Kafka producer
    :param output_topic: Kafka topic through which output is send
    """

    # Transform given results into the JSON
    results_output = ""
    for key, value in data_to_process.iteritems():
        results_output += "{\"@type\": \"protocols_statistics\", \"protocol\": \"" + key + \
                          "\", \"flows\": " + str(value[0]) + ", \"packets\": " + str(value[1]) + \
                          ", \"bytes\": " + str(value[2]) + "}\n"

    # Check if there are any results
    if results_output:
        # Print results to standard output
        cprint(results_output)

        # Send desired output to the output_topic
        kafkaIO.send_data_to_kafka(results_output, producer, output_topic)
Exemplo n.º 3
0
def process_results(results, producer, topic, window_duration):
    """
    Check if attack was reported, or additional flows were detected in same attack and report it.

    :param results: flows that should be reported as attack
    :param producer: producer that sends the data
    :param topic: name of the receiving kafka topic
    :param window_duration: window size (in seconds)
    """
    output_json = ""
    # Transform given results into the JSON
    for key, value in results.iteritems():
        if key in attDict:
            # If there are additional flows for the attack that was reported.
            if (attDict[key][1] + window_duration * 1000) <= value[3]:
                attDict[key] = (attDict[key][0] + value[0], value[3])
                output_json += get_output_json(key, value, attDict[key][0])
        else:
            attDict[key] = (value[0], value[3])
            output_json += get_output_json(key, value, value[0])

    # Check if there are any results
    if output_json:
        # Print results to standard output
        cprint(output_json)

        # Check if dictionary cleaning is necessary
        clean_old_data_from_dictionary(window_duration)

        # Send results to the specified kafka topic
        kafkaIO.send_data_to_kafka(output_json, producer, topic)
Exemplo n.º 4
0
def process_results(results, producer, output_topic):
    """
    Format and report computed statistics.

    :param results: Computed statistics
    :param producer: Producer that sends the data
    :param output_topic: Name of the receiving kafka topic
    """
    # Dictionary to store all data for given statistic
    statistics = {}
    for key, value in results.iteritems():
        # Get statistic name (last element of the key)
        statistic_type = key[-1]
        # Create empty list if statistic type is not in statistics dictionary
        if statistic_type not in statistics.keys():
            statistics[statistic_type] = []
        # Get data part in JSON string format
        if statistic_type == "queried_by_ip":
            data = {"key": key[0], "value": value, "ip": key[1]}
        elif (statistic_type == "queried_domain") and (value == 1):
            # Skip queried domains with only one occurrence
            continue
        else:
            data = {"key": key[0], "value": value}
        # Append data to statistics dictionary
        statistics[statistic_type].append(data)

    # Create all statistics JSONs in string format
    output_json = ""
    for statistic_type, data in statistics.iteritems():
        # Check if Top 100 data elements should be selected to reduce volume of data in database
        if statistic_type in [
                "queried_domain", "nonexisting_domain", "queried_by_ip"
        ]:
            data.sort(key=lambda stat: stat['value'], reverse=True)
            data_array = json.dumps(data[:100])
        else:
            data_array = json.dumps(data)

        output_json += "{\"@type\": \"dns_statistics\", \"@stat_type\": \"" + statistic_type + "\", " + \
                       "\"data_array\": " + data_array + "}\n"

    if output_json:
        # Print data to standard output
        print(output_json)

        # Send results to the specified kafka topic
        kafkaIO.send_data_to_kafka(output_json, producer, output_topic)
Exemplo n.º 5
0
def process_results(data_to_process, producer, output_topic):
    """
    Process analyzed data and modify it into desired output.

    :param data_to_process: analyzed data
    :param producer: Kafka producer
    :param output_topic: Kafka topic through which output is send
    """

    # Here you can format your results output and send it to the kafka topic
    # <-- INSERT YOUR CODE HERE

    # Example of a transformation function that selects values of the dictionary and dumps them as a string
    results_output = '\n'.join(map(json.dumps, data_to_process.values()))

    # Send desired output to the output_topic
    kafkaIO.send_data_to_kafka(results_output, producer, output_topic)
Exemplo n.º 6
0
def process_results(results, producer, output_topic):
    """
    Format and report detected records.

    :param results: Detected records
    :param producer: Kafka producer that sends the data to output topic
    :param output_topic: Name of the receiving kafka topic
    """
    output_json = ""
    # Transform given results into the JSON
    for key, value in results.iteritems():
        output_json += get_output_json(key, value)

    if output_json:
        # Print data to standard output
        cprint(output_json)

        # Send results to the specified kafka topic
        kafkaIO.send_data_to_kafka(output_json, producer, output_topic)
Exemplo n.º 7
0
def process_results(data_to_process, producer, output_topic):
    """
    Process analyzed data and modify it into desired output.

    :param data_to_process: analyzed data
    :param producer: Kafka producer
    :param output_topic: Kafka topic through which output is send
    """
    # Get half size of the distribution array
    mid = int(
        len(configuration['distance']['distribution']['default']['intervals'])
        / 2)

    # Here you can format your results output and send it to the kafka topic
    output_jsons = ""
    for result in data_to_process.values():
        # Check if result should be reported and get distributions sum if yes
        report, distributions_sum = check_if_report(result, configuration)
        if not report:
            continue

        output_json = {}
        output_json['@type'] = 'pattern_finder'
        output_json['configuration'] = configuration['configuration']['name']
        output_json.update(result['output'])
        output_json['data_array'] = []
        highest_distribution_sum = 0
        closest_patterns = []

        for name, distribution in distributions_sum.items():
            output_json['data_array'].append({
                'name': name,
                'distribution': distribution
            })

            left = sum(distribution[:mid])
            right = sum(distribution[mid:])
            limit = (configuration['distance']['distribution'].get(name)
                     or configuration['distance']['distribution']['default']
                     ).get('limit') or configuration['distance'][
                         'distribution']['default'].get('limit')

            # Check if sum of the left side is bigger or equal to given limit and compare both distribution sides
            if left >= limit and left > right:
                if left == highest_distribution_sum:
                    closest_patterns.append(name)
                    highest_distribution_sum = left
                elif left > highest_distribution_sum:
                    closest_patterns = [name]
                    highest_distribution_sum = left

        output_json['closest_patterns'] = closest_patterns
        output_jsons += json.dumps(output_json) + '\n'

    # Check if there are any results
    if output_jsons:
        # Print current time in same format as Spark
        cprint('-------------------------------------------')
        cprint('Time: ' + datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        cprint('-------------------------------------------')

        # Print results to standard output
        cprint(output_jsons)

        # Send desired output to the output_topic
        kafkaIO.send_data_to_kafka(output_jsons, producer, output_topic)
Exemplo n.º 8
0
def process_results(data_to_process, producer, output_topic, count_top_n):
    """
    Process analyzed data and modify it into desired output.

    :param data_to_process: analyzed data  in a format: (src IP , IPStats([PortStats], [DstIPStats], [HTTPHostStats]))
    :param producer: Kafka producer
    :param output_topic: Kafka topic through which output is send
    :param count_top_n: integer of N in TopN

    JSON format:
        {"src_ip":"<host src IPv4 address>",
         "@type":"host_stats_topn_ports",
         "stats":{
            "top_n_dst_ports":
                {
                    "0": {"port":<port #1>, "flows":# of flows},
                    ...
                    "n": {"port":<port #n>, "flows":# of flows}
                },
                "top_n_dst_hosts":
                {
                    "0": {"dst_host":<dst_host #1>, "flows":# of flows},
                    ...
                    "n": {"dst_host":<dst_host #n>, "flows":# of flows}
                }
                "top_n_http_dst":
                {
                    "0": {"dst_host":<dst_host #1>, "flows":# of flows},
                    ...
                    "n": {"dst_host":<dst_host #n>, "flows":# of flows}
                }
            }
        }
    """

    for ip, ip_stats in data_to_process.iteritems():
        # Define output keys for particular stats in X_Stats named tuples
        port_data_dict = {
            "top_n_dst_ports": ip_stats.ports,
            "top_n_dst_hosts": ip_stats.dst_ips,
            "top_n_http_dst": ip_stats.http_hosts
        }

        # Take top n entries from IP's particular stats sorted by flows param
        port_data_dict = {
            key: _sort_by_flows(val_list)[:count_top_n]
            for (key, val_list) in port_data_dict.iteritems()
        }
        # parse the stats from StatsItem to a desirable form
        port_data_dict = {
            key: _parse_stats_items_list(val_list)
            for (key, val_list) in port_data_dict.iteritems()
        }

        # Construct the output object in predefined format
        result_dict = {
            "@type": "top_n_host_stats",
            "src_ip": ip,
            "stats": port_data_dict
        }

        # Dump results
        results_output = json.dumps(result_dict) + "\n"

    # Logging terminal output
    print("%s: Stats of %s IPs parsed and sent" %
          (time.strftime("%c"), len(data_to_process.keys())))

    # Send desired output to the output_topic
    kafkaIO.send_data_to_kafka(results_output, producer, output_topic)
Exemplo n.º 9
0
def process_results(data_to_process, producer, output_topic):
    """
    Process analyzed data and modify it into desired output.

    :param data_to_process: analyzed data
    :param producer: Kafka producer
    :param output_topic: Kafka topic through which output is send
    """
    result_os = {
        "@type": "tls_classification",
        "@stat_type": "os",
        "data_array": []
    }
    result_browser = {
        "@type": "tls_classification",
        "@stat_type": "browser",
        "data_array": []
    }
    result_application = {
        "@type": "tls_classification",
        "@stat_type": "application",
        "data_array": []
    }

    # Map computed statistics to results map
    for key, value in data_to_process.iteritems():
        type_classified = key.split(";")
        if type_classified[0] == "os":
            result_os["data_array"].append({
                "key": type_classified[1],
                "value": value
            })
        elif type_classified[0] == "browser":
            result_browser["data_array"].append({
                "key": type_classified[1],
                "value": value
            })
        elif type_classified[0] == "application":
            result_application["data_array"].append({
                "key": type_classified[1],
                "value": value
            })

    # Add counts for Unknown keys
    for result in result_os, result_browser, result_application:
        unknown_value_present = False
        for result_data in result["data_array"]:
            if result_data["key"] == "Unknown" or result_data[
                    "key"] == "Unknown:Unknown":
                unknown_value_present = True
                result_data["value"] += data_to_process["count"]
        if not unknown_value_present:
            if result["@stat_type"] == "application":
                result["data_array"].append({
                    "key": "Unknown:Unknown",
                    "value": data_to_process["count"]
                })
            else:
                result["data_array"].append({
                    "key": "Unknown",
                    "value": data_to_process["count"]
                })

    # Concat all results into the one output
    output_json = json.dumps(result_os) + "\n" + json.dumps(
        result_browser) + "\n" + json.dumps(result_application) + "\n"

    print(output_json)

    # Send desired output to the output_topic
    kafkaIO.send_data_to_kafka(output_json, producer, output_topic)
Exemplo n.º 10
0
def process_results(data_to_process, producer, output_topic):
    """
    Transform given computation results into the JSON format and send them to the specified kafka instance.

    JSON format:
    {"src_ip":"<host src IPv4 address>",
     "@type":"host_stats",
     "stats":{
        "total":{"packets":<# of packets>,"bytes":# of bytes,"flow":<# of flows>},
        "avg_flow_duration":<avg. duration of flows>,
        "dport_count":<number of distinct destination ports>,
        "peer_number":<number of distinct communication peers>
        "tcp_flags":{"FIN":<number of FIN flags>, "SYN":<number of SYN flags>, ...}
        }
    }

    :param data_to_process: Map in following format  (src IP , (('total_stats', <# of flows>, <# of packets>, <# of bytes>),
                                                         ('peer_number', <# of peers>),
                                                         ('dport_count',  <# number of distinct ports>),
                                                         ('avg_flow_duration',<average flow duration>),
                                                         ('tcp_flags',<bitarray of tcpflags>)
                                                        )
                                               )
    :param producer : Initialized Kafka producer
    :param output_topic: Name of the output topic for Kafka
    :return:
    """

    results = ""

    for ip, data in data_to_process.iteritems():

        total_dict = {}
        stats_dict = {"total": total_dict}
        result_dict = {
            "@type": "host_stats",
            "src_ip": ip,
            "stats": stats_dict
        }

        # Process total stats

        total_dict["flow"] = data[statistics_position["total_stats"]][
            total_stats_position["total_flows"]]
        total_dict["packets"] = data[statistics_position["total_stats"]][
            total_stats_position["total_packets"]]
        total_dict["bytes"] = data[statistics_position["total_stats"]][
            total_stats_position["total_bytes"]]
        # TODO: There is no total_stats_position. If you need this set this as a function or pass it as an argument.

        # Process peer_number stats
        stats_dict["peer_number"] = data[statistics_position["peer_number"]][
            peer_number_position["peer_number"]]

        # Process dport_number stats
        stats_dict["dport_count"] = data[statistics_position["dport_count"]][
            dport_count_position["dport_number"]]

        # Process average flow duration stats
        stats_dict["avg_flow_duration"] = data[
            statistics_position["average_flow_duration"]][
                avg_flow_duration_postion["avg_duration"]]

        # Process tcp flags sums
        if data[statistics_position[
                "tcp_flags"]]:  # if exists statistics for a given host
            stats_dict["tcp_flags"] = map_tcp_flags(
                data[statistics_position["tcp_flags"]][
                    tcp_flags_position["tcp_flags_array"]])

        results += json.dumps(result_dict) + "\n"
    # test print
    # print(results)

    # Send desired output to the output_topic
    kafkaIO.send_data_to_kafka(results, producer, output_topic)