def test_is_private(): private = is_private('192.168.0.1') assert private == True private = is_private('192.169.0.1') assert private == False private = is_private('10.0.0.1') assert private == True private = is_private('172.16.0.1') assert private == True private = is_private('172.33.0.1') assert private == False private = is_private('12.33.0.1') assert private == False private = is_private('fe80:00:1') assert private == True private = is_private('fd80:00:1') assert private == True private = is_private('21e0:fe80:00:1') assert private == False
def extract_features(session_dict, capture_source=None, max_port=None): ''' Extracts netflow level features from packet capture. Args: pcap_path: path to the packet capture to process into features max_port: Maximum port to get features on (default to reading config) Returns: feature_vector: Vector containing the featurized representation of the input pcap. ''' address_type = 'MAC' max_port = 1024 # # Get featurization info from config # try: # config = get_config() # address_type = config['source identifier'] # if max_port is None: # max_port = config['max port'] # except Exception as e: # address_type = 'MAC' # If the capture source isn't specified, default to the most used address if capture_source is None: capture_source = get_source(session_dict, address_type=address_type) # Initialize some counter variables num_sport_init = [0] * max_port num_dport_init = [0] * max_port num_sport_rec = [0] * max_port num_dport_rec = [0] * max_port num_sessions_init = 0 num_external_init = 0 num_tcp_sess_init = 0 num_udp_sess_init = 0 num_icmp_sess_init = 0 num_sessions_rec = 0 num_external_rec = 0 num_tcp_sess_rec = 0 num_udp_sess_rec = 0 num_icmp_sess_rec = 0 # Iterate over all sessions and aggregate the info other_ips = defaultdict(int) for key, session in session_dict.items(): address_1, port_1 = get_ip_port(key[0]) address_2, port_2 = get_ip_port(key[1]) # Get the first packet and grab the macs from it first_packet = session[0][1] source_mac, destination_mac = extract_macs(first_packet) # If the source is the cpature source if (source_mac == capture_source or address_1 == capture_source): if is_private(address_2): other_ips[address_2] += 1 num_sessions_init += 1 num_external_init += is_external(address_1, address_2) num_tcp_sess_init += is_protocol(session, '06') num_udp_sess_init += is_protocol(session, '11') num_icmp_sess_init += is_protocol(session, '01') if int(port_1) < max_port: num_sport_init[int(port_1)] += 1 if int(port_2) < max_port: num_dport_init[int(port_2)] += 1 # If the destination is the capture source if (destination_mac == capture_source or address_2 == capture_source): if is_private(address_1): other_ips[address_1] += 1 num_sessions_rec += 1 num_external_rec += is_external(address_2, address_1) num_tcp_sess_rec += is_protocol(session, '06') num_udp_sess_rec += is_protocol(session, '11') num_icmp_sess_rec += is_protocol(session, '01') if int(port_1) < max_port: num_sport_rec[int(port_1)] += 1 if int(port_2) < max_port: num_dport_rec[int(port_2)] += 1 num_port_sess = np.concatenate( (num_sport_init, num_dport_init, num_sport_rec, num_dport_rec), axis=0) if num_sessions_init == 0: num_sessions_init += 1 if num_sessions_rec == 0: num_sessions_rec += 1 num_port_sess = np.asarray(num_port_sess) / \ (num_sessions_init+num_sessions_rec) extra_features = [0] * 8 extra_features[0] = num_external_init / num_sessions_init extra_features[1] = num_tcp_sess_init / num_sessions_init extra_features[2] = num_udp_sess_init / num_sessions_init extra_features[3] = num_icmp_sess_init / num_sessions_init extra_features[4] = num_external_rec / num_sessions_rec extra_features[5] = num_tcp_sess_rec / num_sessions_rec extra_features[6] = num_udp_sess_rec / num_sessions_rec extra_features[7] = num_icmp_sess_rec / num_sessions_rec feature_vector = np.concatenate((num_port_sess, extra_features), axis=0) return feature_vector, capture_source, list(other_ips.keys())
logger.debug("Generating predictions") last_update, prev_rep = get_previous_state(source_ip, timestamps[0]) _, mean_rep = average_representation(reps, timestamps, prev_representation=prev_rep, last_update=last_update) mean_preds = model.classify_representation(mean_rep) if len(sys.argv) > 2: for p in mean_preds: logger.debug(p) # Update the stored representation current_rep, avg_rep = None, None if reps is not None and is_private(source_ip): logger.debug("Updating stored data") current_rep, avg_rep = update_data(source_ip, reps, timestamps, preds, others, model_hash) # Get the sessions that the model looked at sessions = model.sessions # Clean the sessions clean_sessions = [] inferred_ip = None for session_dict in sessions: cleaned_sessions, inferred_ip = \ clean_session_dict( session_dict, source_address=source_ip )
def eval_rnn(pcap, input_label=None): ''' Evaluate the RNN model on a single pcap ''' load_path = os.path.join('models', 'RandomForestModel.pkl') model = RandomForestModel(duration=None) model.load(load_path) # Get representations from the model reps, source_ip, timestamps, preds, others = model.get_representation( pcap_path, source_ip=None, mean=False) if preds is not None: logger.debug("Generating predictions") last_update, prev_rep = get_previous_state(source_ip, timestamps[0]) _, mean_rep = average_representation(reps, timestamps, prev_representation=prev_rep, last_update=last_update) mean_preds = model.classify_representation(mean_rep) if len(sys.argv) > 2: for p in mean_preds: logger.debug(p) # Update the stored representation current_rep, avg_rep = None, None if reps is not None and is_private(source_ip): logger.debug("Updating stored data") current_rep, avg_rep = update_data(source_ip, reps, timestamps, preds, others, model_hash) # Get the sessions that the model looked at sessions = model.sessions # Clean the sessions clean_sessions = [] inferred_ip = None clean_sessions, inferred_ip = \ clean_session_dict( sessions, source_address=None ) if source_ip is None: source_ip = inferred_ip L_in = [] pred_labels = {l[0]: l[1] for l in mean_preds} if input_label is None: for l in labels: if l not in pred_labels: L_in.append((l, 0)) else: L_in.append((l, pred_labels[l])) else: L_in = [(l, 0) for l in labels if l != input_label] L_in.append((input_label, 1)) # Use the RNN model to compute abnormality scores rnnmodel = AbnormalDetector(num_labels=len(labels)) rnnpath = os.path.join('models', 'AbnormalRNN') rnnmodel.load(rnnpath) for session_dict in clean_sessions: for k, session in session_dict.items(): X, L = create_inputs(L_in, session, 116) score = rnnmodel.get_output(X, L) print(k, score[0, 0])