Пример #1
0
def main(argv):
    try:
        filename = argv[1]
        protocol = "tcp"
        prt = StreamReaderThread(filename, protocol, "80")
        prt.delete_read_connections = True
        prt.start()
        fcsv = open("csv/test.csv", "w")
        counter = 0

        while not prt.done or prt.has_ready_message():
            if not prt.has_ready_message():
                time.sleep(0.0001)
                continue
            buffered_packets = prt.pop_connection()
            if buffered_packets is not None:
                #print(buffered_packets.get_byte_frequency("client"))
                counter += 1
                byte_frequency = ",".join(
                    str(buffered_packets.get_byte_frequency("server")))
                fcsv.write("{},{},{},{},{}\n".format(
                    buffered_packets.tcp_tuple[0],
                    buffered_packets.tcp_tuple[1],
                    buffered_packets.tcp_tuple[2],
                    buffered_packets.tcp_tuple[3], byte_frequency))
                sys.stdout.write("\r{} flows.".format(counter))
                sys.stdout.flush()

        fcsv.close()

    except IndexError:
        print("Usage: python pcap_to_csv.py <pcap_filename>")
Пример #2
0
def count_byte_freq(filename, protocol, port):
    global prt
    global conf

    read_conf()

    prt = StreamReaderThread(get_pcap_file_fullpath(filename), protocol, port)
    prt.start()
    prt.delete_read_connections = True
    counter = 0
    missed_counter = 0

    while not prt.done or prt.has_ready_message():
        if not prt.has_ready_message():
            # print(1)
            # time.sleep(0.0001)
            missed_counter += 1
            sys.stdout.write(
                "\r1-{} flows. Missed: {}. {} items in buffer. packets: {}. last ts: {}"
                .format(counter, missed_counter, len(prt.tcp_buffer),
                        prt.packet_counter, prt.last_timestamp))
            sys.stdout.flush()
            prt.wait_for_data()
            continue
        else:
            start = time.time()
            buffered_packets = prt.pop_connection()
            end = time.time()
            if buffered_packets is None:
                # print(2)
                # time.sleep(0.0001)
                missed_counter += 1
                sys.stdout.write("\r2-{} flows. Missed: {}. Time: {}".format(
                    counter, missed_counter, end - start))
                sys.stdout.flush()
                prt.wait_for_data()
                continue
            elif buffered_packets.get_payload_length("server") > 0:
                counter += 1
                sys.stdout.write("\r3-{} flows. Missed: {}. Time: {}".format(
                    counter, missed_counter, end - start))
                sys.stdout.flush()
            else:
                missed_counter += 1
                sys.stdout.write("\r4-{} flows. Missed: {}. Time: {}".format(
                    counter, missed_counter, end - start))
                sys.stdout.flush()

    print "Total flows: {}".format(counter)
Пример #3
0
def predict_byte_freq_generator(autoencoder,
                                filename,
                                protocol,
                                port,
                                hidden_layers,
                                activation_function,
                                dropout,
                                phase="training",
                                testing_filename=""):
    global prt
    global threshold

    if prt is None:
        if phase == "testing":
            prt = StreamReaderThread(get_pcap_file_fullpath(testing_filename),
                                     protocol, port)
            print("testing filename: " + testing_filename)
        else:
            prt = StreamReaderThread(get_pcap_file_fullpath(filename),
                                     protocol, port)

        prt.delete_read_connections = True
        prt.start()
    else:
        prt.reset_read_status()
        prt.delete_read_connections = True

    errors_list = []
    counter = 0
    print "predict"

    if phase == "testing":
        t1, t2 = load_threshold(filename, protocol, port, hidden_layers,
                                activation_function, dropout)
        check_directory(filename, "results")
        # fresult = open("results/{}/result-{}-hl{}-af{}-do{}-{}.csv".format(filename, protocol + port, ",".join(hidden_layers), activation_function, dropout, testing_filename), "w")
        open_conn()
        experiment_id = create_experiment(filename, testing_filename, protocol,
                                          port, ",".join(hidden_layers),
                                          activation_function, dropout)
        # if fresult is None:
        #     raise Exception("Could not create file")

    # ftemp = open("results/data.txt", "wb")
    # fcsv = open("results/data.csv", "wb")
    # a = csv.writer(fcsv, quoting=csv.QUOTE_ALL)
    # time.sleep(2)
    i_counter = 0
    # for i in range(0,10):
    while (not prt.done) or (prt.has_ready_message()):
        if not prt.has_ready_message():
            prt.wait_for_data()
        else:
            buffered_packets = prt.pop_connection()
            if buffered_packets is None:
                continue
            if buffered_packets.get_payload_length("server") == 0:
                continue

            i_counter += 1
            # print "{}-{}".format(i_counter, buffered_packets.id)
            # print "{}-{}: {}".format(i_counter, buffered_packets.id, buffered_packets.get_payload("server")[:100])
            byte_frequency = buffered_packets.get_byte_frequency("server")
            # ftemp.write(buffered_packets.get_payload())
            # a.writerow(byte_frequency)
            data_x = numpy.reshape(byte_frequency, (1, 256))
            decoded_x = autoencoder.predict(data_x)
            # a.writerow(decoded_x[0])

            # fcsv.close()
            error = numpy.mean((decoded_x - data_x)**2, axis=1)
            # ftemp.write("\r\n\r\n{}".format(error))
            # ftemp.close()
            if phase == "training" or phase == "predicting":
                errors_list.append(error)
            elif phase == "testing":
                decision = decide(error[0], t1, t2)
                # fresult.write("{},{},{},{},{},{}\n".format(buffered_packets.id, error[0], decision[0], decision[1], decision[2], buffered_packets.get_hexlify_payload()))
                write_results_to_db(experiment_id, buffered_packets, error,
                                    decision)

            counter += 1
            sys.stdout.write("\rCalculated {} connections.".format(counter))
            sys.stdout.flush()

    errors_list = numpy.reshape(errors_list, (1, len(errors_list)))
    if phase == "training" or phase == "predicting":
        save_mean_stdev(filename, protocol, port, hidden_layers,
                        activation_function, dropout, errors_list)
        save_q3_iqr(filename, protocol, port, hidden_layers,
                    activation_function, dropout, errors_list)
        save_median_mad(filename, protocol, port, hidden_layers,
                        activation_function, dropout, errors_list)
    elif phase == "testing":
        # fresult.close()
        return
Пример #4
0
def byte_freq_generator(filename, protocol, port, batch_size):
    global prt
    global conf
    global done
    prt = StreamReaderThread(get_pcap_file_fullpath(filename), protocol, port)
    prt.start()
    counter = 0
    done = False

    while not done:
        while not prt.done or prt.has_ready_message():
            if not prt.has_ready_message():
                prt.wait_for_data()
                continue
            else:
                buffered_packets = prt.pop_connection()
                if buffered_packets is None:
                    time.sleep(0.0001)
                    continue
                if buffered_packets.get_payload_length("server") > 0:
                    byte_frequency = buffered_packets.get_byte_frequency(
                        "server")
                    X = numpy.reshape(byte_frequency, (1, 256))

                    if counter == 0 or counter % batch_size == 1:
                        dataX = X
                    else:
                        dataX = numpy.r_["0,2", dataX, X]

                    counter += 1

                    if counter % batch_size == 0:
                        yield dataX, dataX

        if dataX.shape[0] > 0:
            yield dataX, dataX

        prt.reset_read_status()
Пример #5
0
def byte_freq_generator(filename, protocol, port, batch_size):  # 是一个生成器,一个函数定义中包含yield关键字,那么这个函数就不再是一个普通函数
    global prt
    global conf
    global done
    prt = StreamReaderThread(get_pcap_file_fullpath(filename), protocol, port)
    prt.start()  # 线程开始活动
    counter = 0
    done = False

    while not done:
        while not prt.done or prt.has_ready_message():
            if not prt.has_ready_message():  # 没有信息
                prt.wait_for_data()
                continue
            else:
                buffered_packets = prt.pop_connection()
                if buffered_packets is None:
                    time.sleep(0.0001)
                    continue
                if buffered_packets.get_payload_length("server") > 0:
                    byte_frequency = buffered_packets.get_byte_frequency("server")
                    X = numpy.reshape(byte_frequency, (1, 256))

                    if counter == 0 or counter % batch_size == 1:
                        dataX = X
                    else:
                        dataX = numpy.r_["0,2", dataX, X]  # 按列连接两个矩阵,就是把两矩阵上下相加,要求列数相等,类似于pandas中的concat()

                    counter += 1

                    if counter % batch_size == 0:
                        yield dataX, dataX

        if dataX.shape[0] > 0:
            yield dataX, dataX

        prt.reset_read_status()
Пример #6
0
def count_byte_seq_generator(filename, protocol, port, seq_length):
    global prt
    global root_directory

    prt = StreamReaderThread(get_pcap_file_fullpath(filename), protocol, port)
    prt.start()
    prt.delete_read_connections = True
    counter = 0
    stream_counter = 0

    while not prt.done or prt.has_ready_message():
        if not prt.has_ready_message():
            prt.wait_for_data()
            continue
        else:
            buffered_packet = prt.pop_connection()
            if buffered_packet is None:
                prt.wait_for_data()
                continue

            payload_length = buffered_packet.get_payload_length("server")
            if payload_length > MAX_SEQ_LEN:
                payload_length = MAX_SEQ_LEN
            # payload = buffered_packet.get_payload("server")
            # payload = "#" + payload  # mark as beginning of payloads
            # print(payload)
            # x = 0
            # for i in range(0, len(payload) - seq_length, 1):
            #     seq_in = payload[i:i + seq_length]
            #     seq_out = payload[i + seq_length]
            #     print(seq_in)
            #     print(seq_out)
            #     x += 1

            if payload_length > 0:
                stream_counter += 1
                counter += (payload_length - seq_length) + 1
                sys.stdout.write("\r{} streams, {} sequences.".format(
                    stream_counter, counter))
                sys.stdout.flush()

    print "Total streams: {}. Total sequences: {}".format(
        stream_counter, counter)
Пример #7
0
def predict_byte_seq_generator(rnn_model,
                               filename,
                               protocol,
                               port,
                               type,
                               hidden_layers,
                               seq_length,
                               dropout,
                               phase="training",
                               testing_filename=""):
    global prt

    if prt is None:
        if phase == "testing":
            prt = StreamReaderThread(get_pcap_file_fullpath(testing_filename),
                                     protocol, port)
        else:
            prt = StreamReaderThread(get_pcap_file_fullpath(filename),
                                     protocol, port)
        prt.start()
    else:
        prt.reset_read_status()
        prt.delete_read_connections = True

    errors_list = [[], []]
    counter = 0
    print "predict"

    if phase == "testing":
        t1, t2 = load_threshold(type, hidden_layers, seq_length, dropout,
                                protocol, port, filename)
        check_directory(filename, "results")
        fresult = open(
            "results/{}/result-{}-{}-hl{}-seq{}-do{}-{}.csv".format(
                filename, type, protocol + port, hidden_layers, seq_length,
                dropout, testing_filename), "w")
        if not fresult:
            raise Exception("Could not create file")

    # for i in range(0,100):
    while not prt.done or prt.has_ready_message():
        if not prt.has_ready_message():
            prt.wait_for_data()
        else:
            buffered_packet = prt.pop_connection()
            if buffered_packet is None:
                continue

            payload_length = buffered_packet.get_payload_length("server")
            if payload_length <= seq_length:
                continue

            payload = [
                ord(c)
                for c in buffered_packet.get_payload("server")[:MAX_SEQ_LEN]
            ]
            payload_length = len(payload)
            #payload.insert(0, -1) # mark as beginning of payloads
            x_batch = []
            y_batch = []
            for i in range(0, len(payload) - seq_length, 1):
                seq_in = payload[i:i + seq_length]
                seq_out = payload[i + seq_length]
                x = numpy.reshape(seq_in, (1, seq_length))
                #x = numpy.reshape(seq_in, (1, seq_length, 1))
                #x = x / float(255)

                if len(x_batch) == 0:
                    x_batch = x
                    y_batch = [seq_out]
                else:
                    x_batch = numpy.r_[x_batch, x]
                    y_batch = numpy.r_[y_batch, seq_out]

            sys.stdout.write("\rCalculating {} connection. Len: {}".format(
                counter + 1, len(y_batch)))
            sys.stdout.flush()
            if len(y_batch) < 350:
                prediction = rnn_model.predict_on_batch(x_batch)
                predicted_y = numpy.argmax(prediction, axis=1)
            else:
                predicted_y = []
                for i in range(0, len(y_batch), 350):
                    prediction = rnn_model.predict_on_batch(x_batch[i:i + 350])
                    predicted_y = numpy.r_[predicted_y,
                                           (numpy.argmax(prediction, axis=1))]

            binary_anomaly_score = 0
            floating_anomaly_score = 0

            for i in range(0, len(y_batch)):
                if y_batch[i] != predicted_y[i]:
                    binary_anomaly_score += 1
                floating_anomaly_score += (y_batch[i] - predicted_y[i])**2

            binary_prediction_error = float(binary_anomaly_score) / float(
                payload_length)
            floating_prediction_error = floating_anomaly_score / float(
                len(y_batch))

            if phase == "training" or phase == "predicting":
                errors_list[0].append(binary_prediction_error)
                errors_list[1].append(floating_prediction_error)
            elif phase == "testing":
                decision = decide(
                    [binary_prediction_error, floating_prediction_error], t1,
                    t2)
                fresult.write("{},{},{},{},{},{},{},{},{}\n".format(
                    buffered_packet.id, binary_prediction_error, decision[0],
                    decision[1], decision[2], floating_prediction_error,
                    decision[3], decision[4], decision[5]))

            counter += 1
            # for i in range(0,seq_length):
            #     print chr(payload[i]),
            #
            # for i in range(0,len(predicted_y)):
            #     print chr(predicted_y[i]),

            # sys.stdout.write("\rCalculated {} connections.".format(counter))
            # sys.stdout.flush()

    errors_list = numpy.reshape(errors_list, (2, len(errors_list[0])))
    if phase == "training" or phase == "predicting":
        save_mean_stdev(type, protocol, port, hidden_layers, seq_length,
                        dropout, errors_list, filename)
        save_q3_iqr(type, protocol, port, hidden_layers, seq_length, dropout,
                    errors_list, filename)
        save_median_mad(type, protocol, port, hidden_layers, seq_length,
                        dropout, errors_list, filename)
    elif phase == "testing":
        fresult.close()
Пример #8
0
def byte_seq_generator(filename, protocol, port, seq_length, batch_size):
    global prt
    global root_directory

    prt = StreamReaderThread(get_pcap_file_fullpath(filename), protocol, port)
    prt.start()
    counter = 0

    while not done:
        while not prt.done or prt.has_ready_message():
            if not prt.has_ready_message():
                prt.wait_for_data()
                continue
            else:
                buffered_packet = prt.pop_connection()
                if buffered_packet is None:
                    prt.wait_for_data()
                    continue
                if buffered_packet.get_payload_length("server") > seq_length:
                    payload = [
                        ord(c) for c in buffered_packet.get_payload("server")
                        [:MAX_SEQ_LEN]
                    ]
                    #payload.insert(0, -1)  # mark as beginning of payloads

                    for i in range(0, len(payload) - seq_length, 1):
                        seq_in = payload[i:i + seq_length]
                        seq_out = payload[i + seq_length]
                        #X = numpy.reshape(seq_in, (1, seq_length, 1))
                        X = numpy.reshape(seq_in, (1, seq_length))
                        #X = X / float(255)
                        Y = np_utils.to_categorical(seq_out, num_classes=256)

                        if i == 0 or counter % batch_size == 1:
                            dataX = X
                            dataY = Y
                        else:
                            dataX = numpy.r_["0,2", dataX, X]
                            dataY = numpy.r_["0,2", dataY, Y]

                        counter += 1
                        if dataX.shape[0] % batch_size == 0:
                            #print(counter)
                            #print(dataX.shape, dataY.shape)
                            yield dataX, dataY
                            #pass

                    #yield dataX, dataY

        # print "Total sequences: {}".format(counter)
        prt.reset_read_status()
Пример #9
0
def byte_seq_generator(filename, protocol, port, seq_length, batch_size):
    global prt
    global root_directory

    prt = StreamReaderThread(get_pcap_file_fullpath(filename), protocol, port)
    prt.start()
    counter = 0
    longest_length = 0

    while not done:
        while not prt.done or prt.has_ready_message():
            if not prt.has_ready_message():
                prt.wait_for_data()
                continue
            else:
                buffered_packet = prt.pop_connection()
                if buffered_packet is None:
                    prt.wait_for_data()
                    continue
                if buffered_packet.get_payload_length("server") > 0:
                    payload = [
                        ord(c) for c in buffered_packet.get_payload("server")
                    ]
                    #payload.insert(0, -1)  # mark as beginning of payloads

                    X = numpy.reshape(payload, (1, len(payload)))
                    Y = numpy.reshape(
                        np_utils.to_categorical(payload, num_classes=256),
                        (1, len(payload), 256))

                    if len(payload) > longest_length:
                        longest_length = len(payload)
                    counter += 1

                    if counter % batch_size == 1:
                        dataX = X
                        dataY = Y
                    elif counter % batch_size == 0:
                        dataX = pad_sequences(dataX)
                        longest_length = 0
                        yield dataX, dataY
                    else:
                        #dataX = numpy.r_["0,2", dataX, X]
                        #dataY = numpy.r_["0,2", dataY, Y]
                        dataX.append(X)
                        dataY.append(Y)

        # print "Total sequences: {}".format(counter)
        prt.reset_read_status()