Пример #1
0
def test_is_private():
    private = is_private('192.168.0.1')
    assert private == True
    private = is_private('192.169.0.1')
    assert private == False
    private = is_private('10.0.0.1')
    assert private == True
    private = is_private('172.16.0.1')
    assert private == True
    private = is_private('172.33.0.1')
    assert private == False
    private = is_private('12.33.0.1')
    assert private == False
    private = is_private('fe80:00:1')
    assert private == True
    private = is_private('fd80:00:1')
    assert private == True
    private = is_private('21e0:fe80:00:1')
    assert private == False
Пример #2
0
def extract_features(session_dict, capture_source=None, max_port=None):
    '''
    Extracts netflow level features from packet capture.

    Args:
        pcap_path: path to the packet capture to process into features
        max_port:  Maximum port to get features on (default to reading config)

    Returns:
        feature_vector: Vector containing the featurized representation
                        of the input pcap.
    '''
    address_type = 'MAC'
    max_port = 1024

    # # Get featurization info from config
    # try:
    #     config = get_config()
    #     address_type = config['source identifier']
    #     if max_port is None:
    #         max_port = config['max port']
    # except Exception as e:
    #     address_type = 'MAC'

    # If the capture source isn't specified, default to the most used address
    if capture_source is None:
        capture_source = get_source(session_dict, address_type=address_type)

    # Initialize some counter variables
    num_sport_init = [0] * max_port
    num_dport_init = [0] * max_port
    num_sport_rec = [0] * max_port
    num_dport_rec = [0] * max_port

    num_sessions_init = 0
    num_external_init = 0
    num_tcp_sess_init = 0
    num_udp_sess_init = 0
    num_icmp_sess_init = 0

    num_sessions_rec = 0
    num_external_rec = 0
    num_tcp_sess_rec = 0
    num_udp_sess_rec = 0
    num_icmp_sess_rec = 0

    # Iterate over all sessions and aggregate the info
    other_ips = defaultdict(int)
    for key, session in session_dict.items():
        address_1, port_1 = get_ip_port(key[0])
        address_2, port_2 = get_ip_port(key[1])

        # Get the first packet and grab the macs from it
        first_packet = session[0][1]
        source_mac, destination_mac = extract_macs(first_packet)

        # If the source is the cpature source
        if (source_mac == capture_source or address_1 == capture_source):

            if is_private(address_2):
                other_ips[address_2] += 1

            num_sessions_init += 1
            num_external_init += is_external(address_1, address_2)
            num_tcp_sess_init += is_protocol(session, '06')
            num_udp_sess_init += is_protocol(session, '11')
            num_icmp_sess_init += is_protocol(session, '01')

            if int(port_1) < max_port:
                num_sport_init[int(port_1)] += 1

            if int(port_2) < max_port:
                num_dport_init[int(port_2)] += 1

        # If the destination is the capture source
        if (destination_mac == capture_source or address_2 == capture_source):
            if is_private(address_1):
                other_ips[address_1] += 1

            num_sessions_rec += 1
            num_external_rec += is_external(address_2, address_1)
            num_tcp_sess_rec += is_protocol(session, '06')
            num_udp_sess_rec += is_protocol(session, '11')
            num_icmp_sess_rec += is_protocol(session, '01')

            if int(port_1) < max_port:
                num_sport_rec[int(port_1)] += 1
            if int(port_2) < max_port:
                num_dport_rec[int(port_2)] += 1

    num_port_sess = np.concatenate(
        (num_sport_init, num_dport_init, num_sport_rec, num_dport_rec), axis=0)

    if num_sessions_init == 0:
        num_sessions_init += 1
    if num_sessions_rec == 0:
        num_sessions_rec += 1

    num_port_sess = np.asarray(num_port_sess) / \
        (num_sessions_init+num_sessions_rec)

    extra_features = [0] * 8
    extra_features[0] = num_external_init / num_sessions_init
    extra_features[1] = num_tcp_sess_init / num_sessions_init
    extra_features[2] = num_udp_sess_init / num_sessions_init
    extra_features[3] = num_icmp_sess_init / num_sessions_init

    extra_features[4] = num_external_rec / num_sessions_rec
    extra_features[5] = num_tcp_sess_rec / num_sessions_rec
    extra_features[6] = num_udp_sess_rec / num_sessions_rec
    extra_features[7] = num_icmp_sess_rec / num_sessions_rec

    feature_vector = np.concatenate((num_port_sess, extra_features), axis=0)
    return feature_vector, capture_source, list(other_ips.keys())
Пример #3
0
            logger.debug("Generating predictions")
            last_update, prev_rep = get_previous_state(source_ip,
                                                       timestamps[0])

            _, mean_rep = average_representation(reps,
                                                 timestamps,
                                                 prev_representation=prev_rep,
                                                 last_update=last_update)
            mean_preds = model.classify_representation(mean_rep)
            if len(sys.argv) > 2:
                for p in mean_preds:
                    logger.debug(p)
            # Update the stored representation
            current_rep, avg_rep = None, None
            if reps is not None and is_private(source_ip):
                logger.debug("Updating stored data")
                current_rep, avg_rep = update_data(source_ip, reps, timestamps,
                                                   preds, others, model_hash)

            # Get the sessions that the model looked at
            sessions = model.sessions
            # Clean the sessions
            clean_sessions = []
            inferred_ip = None
            for session_dict in sessions:
                cleaned_sessions, inferred_ip = \
                            clean_session_dict(
                                                session_dict,
                                                source_address=source_ip
                                               )
Пример #4
0
def eval_rnn(pcap, input_label=None):
    '''
    Evaluate the RNN model on a single pcap
    '''
    load_path = os.path.join('models', 'RandomForestModel.pkl')
    model = RandomForestModel(duration=None)
    model.load(load_path)

    # Get representations from the model
    reps, source_ip, timestamps, preds, others = model.get_representation(
        pcap_path, source_ip=None, mean=False)

    if preds is not None:
        logger.debug("Generating predictions")
        last_update, prev_rep = get_previous_state(source_ip, timestamps[0])
        _, mean_rep = average_representation(reps,
                                             timestamps,
                                             prev_representation=prev_rep,
                                             last_update=last_update)
        mean_preds = model.classify_representation(mean_rep)
        if len(sys.argv) > 2:
            for p in mean_preds:
                logger.debug(p)
        # Update the stored representation
        current_rep, avg_rep = None, None
        if reps is not None and is_private(source_ip):
            logger.debug("Updating stored data")
            current_rep, avg_rep = update_data(source_ip, reps, timestamps,
                                               preds, others, model_hash)

        # Get the sessions that the model looked at
        sessions = model.sessions
        # Clean the sessions
        clean_sessions = []
        inferred_ip = None


        clean_sessions, inferred_ip = \
                    clean_session_dict(
                                        sessions,
                                        source_address=None
                                      )

        if source_ip is None:
            source_ip = inferred_ip

        L_in = []
        pred_labels = {l[0]: l[1] for l in mean_preds}
        if input_label is None:
            for l in labels:
                if l not in pred_labels:
                    L_in.append((l, 0))
                else:
                    L_in.append((l, pred_labels[l]))
        else:
            L_in = [(l, 0) for l in labels if l != input_label]
            L_in.append((input_label, 1))

        # Use the RNN model to compute abnormality scores
        rnnmodel = AbnormalDetector(num_labels=len(labels))
        rnnpath = os.path.join('models', 'AbnormalRNN')
        rnnmodel.load(rnnpath)

        for session_dict in clean_sessions:
            for k, session in session_dict.items():
                X, L = create_inputs(L_in, session, 116)
                score = rnnmodel.get_output(X, L)
                print(k, score[0, 0])