def process_results(results, producer, output_topic, window_duration): """ Format and report detected records. :param results: Detected records :param producer: Kafka producer that sends the data to output topic :param output_topic: Name of the receiving kafka topic :param window_duration: Duration of the window """ output_json = "" # Transform given results into the JSON for key, value in results.iteritems(): if key in detectionsDict: # If there are additional flows for the attack that was reported. if (detectionsDict[key][0] + window_duration * 1000) <= value[0]: detectionsDict[key] = (value[0], detectionsDict[key][1] + value[1]) output_json += get_output_json(key, value, detectionsDict[key][1]) else: detectionsDict[key] = (value[0], value[1]) output_json += get_output_json(key, value, value[1]) if output_json: # Print data to standard output cprint(output_json) # Check if dictionary cleaning is necessary clean_old_data_from_dictionary(window_duration) # Send results to the specified kafka topic kafkaIO.send_data_to_kafka(output_json, producer, output_topic)
def process_results(data_to_process, producer, output_topic): """ Process analyzed data and modify it into desired output. JSON format: { "@type": "protocols_statistics", "protocol" : <protocol>, "flows": <#flows>, "packets": <#packets>, "bytes": <#bytes> } :param data_to_process: analyzed data :param producer: Kafka producer :param output_topic: Kafka topic through which output is send """ # Transform given results into the JSON results_output = "" for key, value in data_to_process.iteritems(): results_output += "{\"@type\": \"protocols_statistics\", \"protocol\": \"" + key + \ "\", \"flows\": " + str(value[0]) + ", \"packets\": " + str(value[1]) + \ ", \"bytes\": " + str(value[2]) + "}\n" # Check if there are any results if results_output: # Print results to standard output cprint(results_output) # Send desired output to the output_topic kafkaIO.send_data_to_kafka(results_output, producer, output_topic)
def process_results(results, producer, topic, window_duration): """ Check if attack was reported, or additional flows were detected in same attack and report it. :param results: flows that should be reported as attack :param producer: producer that sends the data :param topic: name of the receiving kafka topic :param window_duration: window size (in seconds) """ output_json = "" # Transform given results into the JSON for key, value in results.iteritems(): if key in attDict: # If there are additional flows for the attack that was reported. if (attDict[key][1] + window_duration * 1000) <= value[3]: attDict[key] = (attDict[key][0] + value[0], value[3]) output_json += get_output_json(key, value, attDict[key][0]) else: attDict[key] = (value[0], value[3]) output_json += get_output_json(key, value, value[0]) # Check if there are any results if output_json: # Print results to standard output cprint(output_json) # Check if dictionary cleaning is necessary clean_old_data_from_dictionary(window_duration) # Send results to the specified kafka topic kafkaIO.send_data_to_kafka(output_json, producer, topic)
def process_results(results, producer, output_topic): """ Format and report computed statistics. :param results: Computed statistics :param producer: Producer that sends the data :param output_topic: Name of the receiving kafka topic """ # Dictionary to store all data for given statistic statistics = {} for key, value in results.iteritems(): # Get statistic name (last element of the key) statistic_type = key[-1] # Create empty list if statistic type is not in statistics dictionary if statistic_type not in statistics.keys(): statistics[statistic_type] = [] # Get data part in JSON string format if statistic_type == "queried_by_ip": data = {"key": key[0], "value": value, "ip": key[1]} elif (statistic_type == "queried_domain") and (value == 1): # Skip queried domains with only one occurrence continue else: data = {"key": key[0], "value": value} # Append data to statistics dictionary statistics[statistic_type].append(data) # Create all statistics JSONs in string format output_json = "" for statistic_type, data in statistics.iteritems(): # Check if Top 100 data elements should be selected to reduce volume of data in database if statistic_type in [ "queried_domain", "nonexisting_domain", "queried_by_ip" ]: data.sort(key=lambda stat: stat['value'], reverse=True) data_array = json.dumps(data[:100]) else: data_array = json.dumps(data) output_json += "{\"@type\": \"dns_statistics\", \"@stat_type\": \"" + statistic_type + "\", " + \ "\"data_array\": " + data_array + "}\n" if output_json: # Print data to standard output print(output_json) # Send results to the specified kafka topic kafkaIO.send_data_to_kafka(output_json, producer, output_topic)
def process_results(data_to_process, producer, output_topic): """ Process analyzed data and modify it into desired output. :param data_to_process: analyzed data :param producer: Kafka producer :param output_topic: Kafka topic through which output is send """ # Here you can format your results output and send it to the kafka topic # <-- INSERT YOUR CODE HERE # Example of a transformation function that selects values of the dictionary and dumps them as a string results_output = '\n'.join(map(json.dumps, data_to_process.values())) # Send desired output to the output_topic kafkaIO.send_data_to_kafka(results_output, producer, output_topic)
def process_results(results, producer, output_topic): """ Format and report detected records. :param results: Detected records :param producer: Kafka producer that sends the data to output topic :param output_topic: Name of the receiving kafka topic """ output_json = "" # Transform given results into the JSON for key, value in results.iteritems(): output_json += get_output_json(key, value) if output_json: # Print data to standard output cprint(output_json) # Send results to the specified kafka topic kafkaIO.send_data_to_kafka(output_json, producer, output_topic)
def process_results(data_to_process, producer, output_topic): """ Process analyzed data and modify it into desired output. :param data_to_process: analyzed data :param producer: Kafka producer :param output_topic: Kafka topic through which output is send """ # Get half size of the distribution array mid = int( len(configuration['distance']['distribution']['default']['intervals']) / 2) # Here you can format your results output and send it to the kafka topic output_jsons = "" for result in data_to_process.values(): # Check if result should be reported and get distributions sum if yes report, distributions_sum = check_if_report(result, configuration) if not report: continue output_json = {} output_json['@type'] = 'pattern_finder' output_json['configuration'] = configuration['configuration']['name'] output_json.update(result['output']) output_json['data_array'] = [] highest_distribution_sum = 0 closest_patterns = [] for name, distribution in distributions_sum.items(): output_json['data_array'].append({ 'name': name, 'distribution': distribution }) left = sum(distribution[:mid]) right = sum(distribution[mid:]) limit = (configuration['distance']['distribution'].get(name) or configuration['distance']['distribution']['default'] ).get('limit') or configuration['distance'][ 'distribution']['default'].get('limit') # Check if sum of the left side is bigger or equal to given limit and compare both distribution sides if left >= limit and left > right: if left == highest_distribution_sum: closest_patterns.append(name) highest_distribution_sum = left elif left > highest_distribution_sum: closest_patterns = [name] highest_distribution_sum = left output_json['closest_patterns'] = closest_patterns output_jsons += json.dumps(output_json) + '\n' # Check if there are any results if output_jsons: # Print current time in same format as Spark cprint('-------------------------------------------') cprint('Time: ' + datetime.now().strftime('%Y-%m-%d %H:%M:%S')) cprint('-------------------------------------------') # Print results to standard output cprint(output_jsons) # Send desired output to the output_topic kafkaIO.send_data_to_kafka(output_jsons, producer, output_topic)
def process_results(data_to_process, producer, output_topic, count_top_n): """ Process analyzed data and modify it into desired output. :param data_to_process: analyzed data in a format: (src IP , IPStats([PortStats], [DstIPStats], [HTTPHostStats])) :param producer: Kafka producer :param output_topic: Kafka topic through which output is send :param count_top_n: integer of N in TopN JSON format: {"src_ip":"<host src IPv4 address>", "@type":"host_stats_topn_ports", "stats":{ "top_n_dst_ports": { "0": {"port":<port #1>, "flows":# of flows}, ... "n": {"port":<port #n>, "flows":# of flows} }, "top_n_dst_hosts": { "0": {"dst_host":<dst_host #1>, "flows":# of flows}, ... "n": {"dst_host":<dst_host #n>, "flows":# of flows} } "top_n_http_dst": { "0": {"dst_host":<dst_host #1>, "flows":# of flows}, ... "n": {"dst_host":<dst_host #n>, "flows":# of flows} } } } """ for ip, ip_stats in data_to_process.iteritems(): # Define output keys for particular stats in X_Stats named tuples port_data_dict = { "top_n_dst_ports": ip_stats.ports, "top_n_dst_hosts": ip_stats.dst_ips, "top_n_http_dst": ip_stats.http_hosts } # Take top n entries from IP's particular stats sorted by flows param port_data_dict = { key: _sort_by_flows(val_list)[:count_top_n] for (key, val_list) in port_data_dict.iteritems() } # parse the stats from StatsItem to a desirable form port_data_dict = { key: _parse_stats_items_list(val_list) for (key, val_list) in port_data_dict.iteritems() } # Construct the output object in predefined format result_dict = { "@type": "top_n_host_stats", "src_ip": ip, "stats": port_data_dict } # Dump results results_output = json.dumps(result_dict) + "\n" # Logging terminal output print("%s: Stats of %s IPs parsed and sent" % (time.strftime("%c"), len(data_to_process.keys()))) # Send desired output to the output_topic kafkaIO.send_data_to_kafka(results_output, producer, output_topic)
def process_results(data_to_process, producer, output_topic): """ Process analyzed data and modify it into desired output. :param data_to_process: analyzed data :param producer: Kafka producer :param output_topic: Kafka topic through which output is send """ result_os = { "@type": "tls_classification", "@stat_type": "os", "data_array": [] } result_browser = { "@type": "tls_classification", "@stat_type": "browser", "data_array": [] } result_application = { "@type": "tls_classification", "@stat_type": "application", "data_array": [] } # Map computed statistics to results map for key, value in data_to_process.iteritems(): type_classified = key.split(";") if type_classified[0] == "os": result_os["data_array"].append({ "key": type_classified[1], "value": value }) elif type_classified[0] == "browser": result_browser["data_array"].append({ "key": type_classified[1], "value": value }) elif type_classified[0] == "application": result_application["data_array"].append({ "key": type_classified[1], "value": value }) # Add counts for Unknown keys for result in result_os, result_browser, result_application: unknown_value_present = False for result_data in result["data_array"]: if result_data["key"] == "Unknown" or result_data[ "key"] == "Unknown:Unknown": unknown_value_present = True result_data["value"] += data_to_process["count"] if not unknown_value_present: if result["@stat_type"] == "application": result["data_array"].append({ "key": "Unknown:Unknown", "value": data_to_process["count"] }) else: result["data_array"].append({ "key": "Unknown", "value": data_to_process["count"] }) # Concat all results into the one output output_json = json.dumps(result_os) + "\n" + json.dumps( result_browser) + "\n" + json.dumps(result_application) + "\n" print(output_json) # Send desired output to the output_topic kafkaIO.send_data_to_kafka(output_json, producer, output_topic)
def process_results(data_to_process, producer, output_topic): """ Transform given computation results into the JSON format and send them to the specified kafka instance. JSON format: {"src_ip":"<host src IPv4 address>", "@type":"host_stats", "stats":{ "total":{"packets":<# of packets>,"bytes":# of bytes,"flow":<# of flows>}, "avg_flow_duration":<avg. duration of flows>, "dport_count":<number of distinct destination ports>, "peer_number":<number of distinct communication peers> "tcp_flags":{"FIN":<number of FIN flags>, "SYN":<number of SYN flags>, ...} } } :param data_to_process: Map in following format (src IP , (('total_stats', <# of flows>, <# of packets>, <# of bytes>), ('peer_number', <# of peers>), ('dport_count', <# number of distinct ports>), ('avg_flow_duration',<average flow duration>), ('tcp_flags',<bitarray of tcpflags>) ) ) :param producer : Initialized Kafka producer :param output_topic: Name of the output topic for Kafka :return: """ results = "" for ip, data in data_to_process.iteritems(): total_dict = {} stats_dict = {"total": total_dict} result_dict = { "@type": "host_stats", "src_ip": ip, "stats": stats_dict } # Process total stats total_dict["flow"] = data[statistics_position["total_stats"]][ total_stats_position["total_flows"]] total_dict["packets"] = data[statistics_position["total_stats"]][ total_stats_position["total_packets"]] total_dict["bytes"] = data[statistics_position["total_stats"]][ total_stats_position["total_bytes"]] # TODO: There is no total_stats_position. If you need this set this as a function or pass it as an argument. # Process peer_number stats stats_dict["peer_number"] = data[statistics_position["peer_number"]][ peer_number_position["peer_number"]] # Process dport_number stats stats_dict["dport_count"] = data[statistics_position["dport_count"]][ dport_count_position["dport_number"]] # Process average flow duration stats stats_dict["avg_flow_duration"] = data[ statistics_position["average_flow_duration"]][ avg_flow_duration_postion["avg_duration"]] # Process tcp flags sums if data[statistics_position[ "tcp_flags"]]: # if exists statistics for a given host stats_dict["tcp_flags"] = map_tcp_flags( data[statistics_position["tcp_flags"]][ tcp_flags_position["tcp_flags_array"]]) results += json.dumps(result_dict) + "\n" # test print # print(results) # Send desired output to the output_topic kafkaIO.send_data_to_kafka(results, producer, output_topic)