def read_ossec_minimal_json(filename): alerts = [] timestamps = [] with open(filename) as f: for line in f: entry = json.loads(line) alert = {} alert['timestamp'] = parser.parse(entry['timestamp']).timestamp() if 'data' in entry and 'srcip' in entry['data']: alert['srcip'] = re.split(delimiters, entry['data']['srcip'])[0] else: alert['srcip'] = get_src_ip(entry['full_log']) if 'data' in entry and 'dstip' in entry['data']: alert['dstip'] = re.split(delimiters, entry['data']['dstip'])[0] else: alert['dstip'] = get_dst_ip(entry['full_log']) alert['srcport'] = get_src_port(entry['full_log'], alert['srcip']) alert['dstport'] = get_dst_port(entry['full_log'], alert['dstip']) if 'data' in entry and 'proto' in entry['data']: alert['proto'] = entry['data']['proto'] else: alert['proto'] = '' if 'description' in entry['rule']: alert['class'] = entry['rule']['description'] else: alert['class'] = '' alert_obj = Alert(alert) alert_obj.file = filename alerts.append(alert_obj) timestamps.append(alert['timestamp']) return alerts, timestamps
def read_aminer_json(filename): alerts = [] timestamps = [] with open(filename) as f: json_alerts = json.load(f) for json_alert in json_alerts: alert_obj = Alert(json_alert) alert_obj.file = filename alerts.append(alert_obj) timestamps.append(json_alert['LogData']['Timestamps'][0]) return alerts, timestamps
def read_ossec_full_json(filename): alerts = [] timestamps = [] with open(filename) as f: for line in f: if len(line.strip('\n\r')) == 0: continue json_alert = json.loads(line) alert_obj = Alert(json_alert) alert_obj.file = filename alerts.append(alert_obj) timestamps.append( parser.parse(alert_obj.d['timestamp']).timestamp()) return alerts, timestamps
def create_bag_of_alerts(self, threshold, max_val_limit, min_key_occurrence, min_val_occurrence): # Computes a bag-of-alerts model for this group. self.bag_of_alerts = { } # Remove current bag_of_alerts, create new one from current alerts self.merge_seq = [ ] # Remove current merge_seq, create new one from current alerts alerts_to_merge = {} alerts_template_dict = {} for alert in self.alerts: max_s = -1 best_matching_alert_template = None for alert_template in alerts_to_merge: s = similarity.similarity.get_json_similarity( alert.d, alert_template.d) if s >= max_s: max_s = s best_matching_alert_template = alert_template if max_s >= threshold: alerts_to_merge[best_matching_alert_template].append(alert.d) alerts_template_dict[alert] = best_matching_alert_template else: alerts_to_merge[alert] = [alert.d] alerts_template_dict[alert] = alert template_merge_dict = {} for template, alerts_to_merge_list in alerts_to_merge.items(): merge = Alert( merging.merge.merge_json(alerts_to_merge_list, max_val_limit, min_key_occurrence, min_val_occurrence)) self.bag_of_alerts[merge] = len(alerts_to_merge_list) template_merge_dict[template] = merge for alert in self.alerts: self.merge_seq.append( template_merge_dict[alerts_template_dict[alert]])
def merge_exact(groups, min_alert_match_similarity=0.0, max_val_limit=None, min_key_occurrence=0.0, min_val_occurrence=0.0, group_max_alerts=None): # Compute similarities for each combination of alerts between group and merge # Connect the ones with the highest of all, proceed with second highest, etc. # Skip the ones that are already taken and proceed with next ones. # Add the ones that are left as new alerts in merge. # Also take offset in positions into account when computing similarity (timing?), might help with collisions even if just small weight if group_max_alerts is None: # In case that it was already computed before (e.g., in merge_group), do not compute largest group again max_alerts = -1 # Find largest group, because it has the most alerts to be matched to for group in groups: if len(group.alerts) > max_alerts: group_max_alerts = group max_alerts = len(group.alerts) alerts_to_merge = {} # dict holding all lists of alerts to be merged for alert in group_max_alerts.alerts: alerts_to_merge[alert] = [alert] for group in groups: if group == group_max_alerts: # Alerts in this group were already added to alerts_to_merge continue else: alert_matching = similarity.find_alert_matching(group.alerts, alerts_to_merge, early_stopping_threshold=0.0, w=None, min_alert_match_similarity=min_alert_match_similarity) used_m = [] used_g = [] for alert_g, alert_m in alert_matching: if alert_m not in used_m and alert_g not in used_g: # Make sure that each alert_g points to one alert_m used_m.append(alert_m) used_g.append(alert_g) alerts_to_merge[alert_m].append(alert_g) # In case that alerts in group were not matched (e.g., all matching similarities < min_alert_match_similarity), add them to merge as single alerts missing_alerts = set(group.alerts) - set(used_g) for missing_alert in missing_alerts: alerts_to_merge[missing_alert] = [missing_alert] merged_alerts = [] for alerts in alerts_to_merge.values(): json_alerts = [] for alert in alerts: json_alerts.append(alert.d) merged_alert = Alert(merge_json(json_alerts, max_val_limit=max_val_limit, min_key_occurrence=min_key_occurrence, min_val_occurrence=min_val_occurrence)) merged_alerts.append(merged_alert) return merged_alerts
def merge_bag(groups, min_alert_match_similarity=0.0, max_val_limit=None, min_key_occurrence=0.0, min_val_occurrence=0.0): max_alert_patterns = -1 group_max_alert_patterns = None # Find largest group, because it has the most alert patterns to be matched to for group in groups: if len(group.bag_of_alerts) > max_alert_patterns: group_max_alert_patterns = group max_alert_patterns = len(group.bag_of_alerts) alert_patterns_to_merge = {} for alert_pattern, freq in group_max_alert_patterns.bag_of_alerts.items(): alert_patterns_to_merge[alert_pattern] = [(alert_pattern, freq)] for group in groups: if group == group_max_alert_patterns: # Alert patterns in this group were already added to alert_patterns_to_merge continue else: alert_matching = similarity.find_alert_matching(group.bag_of_alerts, alert_patterns_to_merge, early_stopping_threshold=0.0, w=None, min_alert_match_similarity=min_alert_match_similarity) # Early stopping threshold should not be used for bag matching, since it does not correspond to grouping criteria used_m = [] used_g = [] for alert_g, alert_m in alert_matching: if alert_m not in used_m and alert_g not in used_g: # Make sure that each alert_g points to one alert_m used_m.append(alert_m) used_g.append(alert_g) alert_patterns_to_merge[alert_m].append((alert_g, group.bag_of_alerts[alert_g])) # In case that alerts in group were not matched (e.g., all matching similarities < min_alert_match_similarity), add them to merge as single alerts missing_alerts = set(group.bag_of_alerts) - set(used_g) for missing_alert in missing_alerts: alert_patterns_to_merge[missing_alert] = [(missing_alert, group.bag_of_alerts[missing_alert])] merged_bag_of_alerts = {} merged_alerts = [] merged_bags = {} merged_bags_inv = {} for alert_pattern, alert_tuples in alert_patterns_to_merge.items(): json_alerts = [] frequencies = [] raw_alerts = [] for alert_pattern, freq in alert_tuples: json_alerts.append(alert_pattern.d) frequencies.append(freq) raw_alerts.append(alert_pattern) json_merge = Alert(merge_json(json_alerts, max_val_limit=max_val_limit, min_key_occurrence=min_key_occurrence, min_val_occurrence=min_val_occurrence)) first_frequency = frequencies[0] # Start values for min max search max_frequency = -1 if type(first_frequency) is int: min_frequency = first_frequency elif type(first_frequency) is tuple: min_frequency = first_frequency[0] for frequency in frequencies: if type(frequency) is int: min_frequency = min(min_frequency, frequency) max_frequency = max(max_frequency, frequency) elif type(frequency) is tuple: min_frequency = min(min_frequency, frequency[0], frequency[1]) max_frequency = max(max_frequency, frequency[0], frequency[1]) if min_frequency == max_frequency: merged_bag_of_alerts[json_merge] = min_frequency else: merged_bag_of_alerts[json_merge] = (min_frequency, max_frequency) # In case that this group will be used for comparison, add unordered list of alerts according to respective frequencies for i in range(math.ceil((max_frequency + min_frequency) / 2)): merged_alerts.append(json_merge) merged_bags[json_merge] = raw_alerts for raw_alert in raw_alerts: merged_bags_inv[raw_alert] = json_merge return merged_bag_of_alerts, merged_alerts, merged_bags, merged_bags_inv
1 ], "D": { "D1": true, "D2": [ "d2.1", "d2.2", "d2.3" ] } }""" print('Identical') g1 = Group() g1.add_to_group([ Alert(json.loads(base_1)), Alert(json.loads(base_2)), Alert(json.loads(base_3)) ]) g2 = Group() g2.add_to_group( [Alert(json.loads(j_1)), Alert(json.loads(j_2)), Alert(json.loads(j_3))]) createOutput([g1, g2]) print('Change order of two alerts') g1 = Group() g1.add_to_group([ Alert(json.loads(base_1)), Alert(json.loads(base_2)),
], "D": { "D1": true, "D2": [ "d2.1", "d2.2", "d2.3" ] } }""" print('Sample') labels = {} g1 = Group() g1.add_to_group([ Alert(json.loads(base_1)), Alert(json.loads(base_2)), Alert(json.loads(base_3)) ]) labels[g1] = 'g1' g2 = Group() g2.add_to_group( [Alert(json.loads(j_1)), Alert(json.loads(j_2)), Alert(json.loads(j_3))]) labels[g2] = 'g2' g3 = Group() g3.add_to_group([ Alert(json.loads(base_1)), Alert(json.loads(base_2)), Alert(json.loads(base_3)),
def add_alert_to_group(alert, group, times): group.bag_of_alerts[alert] = 1 for i in range(times-1): group.alerts.append(Alert(alert.d)) group.merge_seq.append(Alert(alert.d)) group.bag_of_alerts[alert] += 1
"C": [ "c", 1 ], "D": { "D1": true, "D2": [ "d2.1", "d2.2", "d2.3" ] } }""" print('Identical') createOutput([Alert(json.loads(base_1)), Alert(json.loads(base_2)), Alert(json.loads(base_3))], [Alert(json.loads(j_1)), Alert(json.loads(j_2)), Alert(json.loads(j_3))]) print('Change order of two alerts') createOutput([Alert(json.loads(base_1)), Alert(json.loads(base_2)), Alert(json.loads(base_3))], [Alert(json.loads(j_2)), Alert(json.loads(j_1)), Alert(json.loads(j_3))]) print('Change order of three alerts') createOutput([Alert(json.loads(base_1)), Alert(json.loads(base_2)), Alert(json.loads(base_3))], [Alert(json.loads(j_2)), Alert(json.loads(j_3)), Alert(json.loads(j_1))]) print('Change value A in one alert') json_1 = json.loads(j_1) json_1['A'] = 'x' createOutput([Alert(json.loads(base_1)), Alert(json.loads(base_2)), Alert(json.loads(base_3))], [Alert(json_1), Alert(json.loads(j_2)), Alert(json.loads(j_3))]) print('Different amounts of alerts') createOutput([Alert(json.loads(base_1)), Alert(json.loads(base_2)), Alert(json.loads(base_3))], [Alert(json.loads(j_1)), Alert(json.loads(j_3))])