def main(argv): try: filename = argv[1] protocol = "tcp" prt = StreamReaderThread(filename, protocol, "80") prt.delete_read_connections = True prt.start() fcsv = open("csv/test.csv", "w") counter = 0 while not prt.done or prt.has_ready_message(): if not prt.has_ready_message(): time.sleep(0.0001) continue buffered_packets = prt.pop_connection() if buffered_packets is not None: #print(buffered_packets.get_byte_frequency("client")) counter += 1 byte_frequency = ",".join( str(buffered_packets.get_byte_frequency("server"))) fcsv.write("{},{},{},{},{}\n".format( buffered_packets.tcp_tuple[0], buffered_packets.tcp_tuple[1], buffered_packets.tcp_tuple[2], buffered_packets.tcp_tuple[3], byte_frequency)) sys.stdout.write("\r{} flows.".format(counter)) sys.stdout.flush() fcsv.close() except IndexError: print("Usage: python pcap_to_csv.py <pcap_filename>")
def count_byte_freq(filename, protocol, port): global prt global conf read_conf() prt = StreamReaderThread(get_pcap_file_fullpath(filename), protocol, port) prt.start() prt.delete_read_connections = True counter = 0 missed_counter = 0 while not prt.done or prt.has_ready_message(): if not prt.has_ready_message(): # print(1) # time.sleep(0.0001) missed_counter += 1 sys.stdout.write( "\r1-{} flows. Missed: {}. {} items in buffer. packets: {}. last ts: {}" .format(counter, missed_counter, len(prt.tcp_buffer), prt.packet_counter, prt.last_timestamp)) sys.stdout.flush() prt.wait_for_data() continue else: start = time.time() buffered_packets = prt.pop_connection() end = time.time() if buffered_packets is None: # print(2) # time.sleep(0.0001) missed_counter += 1 sys.stdout.write("\r2-{} flows. Missed: {}. Time: {}".format( counter, missed_counter, end - start)) sys.stdout.flush() prt.wait_for_data() continue elif buffered_packets.get_payload_length("server") > 0: counter += 1 sys.stdout.write("\r3-{} flows. Missed: {}. Time: {}".format( counter, missed_counter, end - start)) sys.stdout.flush() else: missed_counter += 1 sys.stdout.write("\r4-{} flows. Missed: {}. Time: {}".format( counter, missed_counter, end - start)) sys.stdout.flush() print "Total flows: {}".format(counter)
def predict_byte_freq_generator(autoencoder, filename, protocol, port, hidden_layers, activation_function, dropout, phase="training", testing_filename=""): global prt global threshold if prt is None: if phase == "testing": prt = StreamReaderThread(get_pcap_file_fullpath(testing_filename), protocol, port) print("testing filename: " + testing_filename) else: prt = StreamReaderThread(get_pcap_file_fullpath(filename), protocol, port) prt.delete_read_connections = True prt.start() else: prt.reset_read_status() prt.delete_read_connections = True errors_list = [] counter = 0 print "predict" if phase == "testing": t1, t2 = load_threshold(filename, protocol, port, hidden_layers, activation_function, dropout) check_directory(filename, "results") # fresult = open("results/{}/result-{}-hl{}-af{}-do{}-{}.csv".format(filename, protocol + port, ",".join(hidden_layers), activation_function, dropout, testing_filename), "w") open_conn() experiment_id = create_experiment(filename, testing_filename, protocol, port, ",".join(hidden_layers), activation_function, dropout) # if fresult is None: # raise Exception("Could not create file") # ftemp = open("results/data.txt", "wb") # fcsv = open("results/data.csv", "wb") # a = csv.writer(fcsv, quoting=csv.QUOTE_ALL) # time.sleep(2) i_counter = 0 # for i in range(0,10): while (not prt.done) or (prt.has_ready_message()): if not prt.has_ready_message(): prt.wait_for_data() else: buffered_packets = prt.pop_connection() if buffered_packets is None: continue if buffered_packets.get_payload_length("server") == 0: continue i_counter += 1 # print "{}-{}".format(i_counter, buffered_packets.id) # print "{}-{}: {}".format(i_counter, buffered_packets.id, buffered_packets.get_payload("server")[:100]) byte_frequency = buffered_packets.get_byte_frequency("server") # ftemp.write(buffered_packets.get_payload()) # a.writerow(byte_frequency) data_x = numpy.reshape(byte_frequency, (1, 256)) decoded_x = autoencoder.predict(data_x) # a.writerow(decoded_x[0]) # fcsv.close() error = numpy.mean((decoded_x - data_x)**2, axis=1) # ftemp.write("\r\n\r\n{}".format(error)) # ftemp.close() if phase == "training" or phase == "predicting": errors_list.append(error) elif phase == "testing": decision = decide(error[0], t1, t2) # fresult.write("{},{},{},{},{},{}\n".format(buffered_packets.id, error[0], decision[0], decision[1], decision[2], buffered_packets.get_hexlify_payload())) write_results_to_db(experiment_id, buffered_packets, error, decision) counter += 1 sys.stdout.write("\rCalculated {} connections.".format(counter)) sys.stdout.flush() errors_list = numpy.reshape(errors_list, (1, len(errors_list))) if phase == "training" or phase == "predicting": save_mean_stdev(filename, protocol, port, hidden_layers, activation_function, dropout, errors_list) save_q3_iqr(filename, protocol, port, hidden_layers, activation_function, dropout, errors_list) save_median_mad(filename, protocol, port, hidden_layers, activation_function, dropout, errors_list) elif phase == "testing": # fresult.close() return
def byte_freq_generator(filename, protocol, port, batch_size): global prt global conf global done prt = StreamReaderThread(get_pcap_file_fullpath(filename), protocol, port) prt.start() counter = 0 done = False while not done: while not prt.done or prt.has_ready_message(): if not prt.has_ready_message(): prt.wait_for_data() continue else: buffered_packets = prt.pop_connection() if buffered_packets is None: time.sleep(0.0001) continue if buffered_packets.get_payload_length("server") > 0: byte_frequency = buffered_packets.get_byte_frequency( "server") X = numpy.reshape(byte_frequency, (1, 256)) if counter == 0 or counter % batch_size == 1: dataX = X else: dataX = numpy.r_["0,2", dataX, X] counter += 1 if counter % batch_size == 0: yield dataX, dataX if dataX.shape[0] > 0: yield dataX, dataX prt.reset_read_status()
def byte_freq_generator(filename, protocol, port, batch_size): # 是一个生成器,一个函数定义中包含yield关键字,那么这个函数就不再是一个普通函数 global prt global conf global done prt = StreamReaderThread(get_pcap_file_fullpath(filename), protocol, port) prt.start() # 线程开始活动 counter = 0 done = False while not done: while not prt.done or prt.has_ready_message(): if not prt.has_ready_message(): # 没有信息 prt.wait_for_data() continue else: buffered_packets = prt.pop_connection() if buffered_packets is None: time.sleep(0.0001) continue if buffered_packets.get_payload_length("server") > 0: byte_frequency = buffered_packets.get_byte_frequency("server") X = numpy.reshape(byte_frequency, (1, 256)) if counter == 0 or counter % batch_size == 1: dataX = X else: dataX = numpy.r_["0,2", dataX, X] # 按列连接两个矩阵,就是把两矩阵上下相加,要求列数相等,类似于pandas中的concat() counter += 1 if counter % batch_size == 0: yield dataX, dataX if dataX.shape[0] > 0: yield dataX, dataX prt.reset_read_status()
def count_byte_seq_generator(filename, protocol, port, seq_length): global prt global root_directory prt = StreamReaderThread(get_pcap_file_fullpath(filename), protocol, port) prt.start() prt.delete_read_connections = True counter = 0 stream_counter = 0 while not prt.done or prt.has_ready_message(): if not prt.has_ready_message(): prt.wait_for_data() continue else: buffered_packet = prt.pop_connection() if buffered_packet is None: prt.wait_for_data() continue payload_length = buffered_packet.get_payload_length("server") if payload_length > MAX_SEQ_LEN: payload_length = MAX_SEQ_LEN # payload = buffered_packet.get_payload("server") # payload = "#" + payload # mark as beginning of payloads # print(payload) # x = 0 # for i in range(0, len(payload) - seq_length, 1): # seq_in = payload[i:i + seq_length] # seq_out = payload[i + seq_length] # print(seq_in) # print(seq_out) # x += 1 if payload_length > 0: stream_counter += 1 counter += (payload_length - seq_length) + 1 sys.stdout.write("\r{} streams, {} sequences.".format( stream_counter, counter)) sys.stdout.flush() print "Total streams: {}. Total sequences: {}".format( stream_counter, counter)
def predict_byte_seq_generator(rnn_model, filename, protocol, port, type, hidden_layers, seq_length, dropout, phase="training", testing_filename=""): global prt if prt is None: if phase == "testing": prt = StreamReaderThread(get_pcap_file_fullpath(testing_filename), protocol, port) else: prt = StreamReaderThread(get_pcap_file_fullpath(filename), protocol, port) prt.start() else: prt.reset_read_status() prt.delete_read_connections = True errors_list = [[], []] counter = 0 print "predict" if phase == "testing": t1, t2 = load_threshold(type, hidden_layers, seq_length, dropout, protocol, port, filename) check_directory(filename, "results") fresult = open( "results/{}/result-{}-{}-hl{}-seq{}-do{}-{}.csv".format( filename, type, protocol + port, hidden_layers, seq_length, dropout, testing_filename), "w") if not fresult: raise Exception("Could not create file") # for i in range(0,100): while not prt.done or prt.has_ready_message(): if not prt.has_ready_message(): prt.wait_for_data() else: buffered_packet = prt.pop_connection() if buffered_packet is None: continue payload_length = buffered_packet.get_payload_length("server") if payload_length <= seq_length: continue payload = [ ord(c) for c in buffered_packet.get_payload("server")[:MAX_SEQ_LEN] ] payload_length = len(payload) #payload.insert(0, -1) # mark as beginning of payloads x_batch = [] y_batch = [] for i in range(0, len(payload) - seq_length, 1): seq_in = payload[i:i + seq_length] seq_out = payload[i + seq_length] x = numpy.reshape(seq_in, (1, seq_length)) #x = numpy.reshape(seq_in, (1, seq_length, 1)) #x = x / float(255) if len(x_batch) == 0: x_batch = x y_batch = [seq_out] else: x_batch = numpy.r_[x_batch, x] y_batch = numpy.r_[y_batch, seq_out] sys.stdout.write("\rCalculating {} connection. Len: {}".format( counter + 1, len(y_batch))) sys.stdout.flush() if len(y_batch) < 350: prediction = rnn_model.predict_on_batch(x_batch) predicted_y = numpy.argmax(prediction, axis=1) else: predicted_y = [] for i in range(0, len(y_batch), 350): prediction = rnn_model.predict_on_batch(x_batch[i:i + 350]) predicted_y = numpy.r_[predicted_y, (numpy.argmax(prediction, axis=1))] binary_anomaly_score = 0 floating_anomaly_score = 0 for i in range(0, len(y_batch)): if y_batch[i] != predicted_y[i]: binary_anomaly_score += 1 floating_anomaly_score += (y_batch[i] - predicted_y[i])**2 binary_prediction_error = float(binary_anomaly_score) / float( payload_length) floating_prediction_error = floating_anomaly_score / float( len(y_batch)) if phase == "training" or phase == "predicting": errors_list[0].append(binary_prediction_error) errors_list[1].append(floating_prediction_error) elif phase == "testing": decision = decide( [binary_prediction_error, floating_prediction_error], t1, t2) fresult.write("{},{},{},{},{},{},{},{},{}\n".format( buffered_packet.id, binary_prediction_error, decision[0], decision[1], decision[2], floating_prediction_error, decision[3], decision[4], decision[5])) counter += 1 # for i in range(0,seq_length): # print chr(payload[i]), # # for i in range(0,len(predicted_y)): # print chr(predicted_y[i]), # sys.stdout.write("\rCalculated {} connections.".format(counter)) # sys.stdout.flush() errors_list = numpy.reshape(errors_list, (2, len(errors_list[0]))) if phase == "training" or phase == "predicting": save_mean_stdev(type, protocol, port, hidden_layers, seq_length, dropout, errors_list, filename) save_q3_iqr(type, protocol, port, hidden_layers, seq_length, dropout, errors_list, filename) save_median_mad(type, protocol, port, hidden_layers, seq_length, dropout, errors_list, filename) elif phase == "testing": fresult.close()
def byte_seq_generator(filename, protocol, port, seq_length, batch_size): global prt global root_directory prt = StreamReaderThread(get_pcap_file_fullpath(filename), protocol, port) prt.start() counter = 0 while not done: while not prt.done or prt.has_ready_message(): if not prt.has_ready_message(): prt.wait_for_data() continue else: buffered_packet = prt.pop_connection() if buffered_packet is None: prt.wait_for_data() continue if buffered_packet.get_payload_length("server") > seq_length: payload = [ ord(c) for c in buffered_packet.get_payload("server") [:MAX_SEQ_LEN] ] #payload.insert(0, -1) # mark as beginning of payloads for i in range(0, len(payload) - seq_length, 1): seq_in = payload[i:i + seq_length] seq_out = payload[i + seq_length] #X = numpy.reshape(seq_in, (1, seq_length, 1)) X = numpy.reshape(seq_in, (1, seq_length)) #X = X / float(255) Y = np_utils.to_categorical(seq_out, num_classes=256) if i == 0 or counter % batch_size == 1: dataX = X dataY = Y else: dataX = numpy.r_["0,2", dataX, X] dataY = numpy.r_["0,2", dataY, Y] counter += 1 if dataX.shape[0] % batch_size == 0: #print(counter) #print(dataX.shape, dataY.shape) yield dataX, dataY #pass #yield dataX, dataY # print "Total sequences: {}".format(counter) prt.reset_read_status()
def byte_seq_generator(filename, protocol, port, seq_length, batch_size): global prt global root_directory prt = StreamReaderThread(get_pcap_file_fullpath(filename), protocol, port) prt.start() counter = 0 longest_length = 0 while not done: while not prt.done or prt.has_ready_message(): if not prt.has_ready_message(): prt.wait_for_data() continue else: buffered_packet = prt.pop_connection() if buffered_packet is None: prt.wait_for_data() continue if buffered_packet.get_payload_length("server") > 0: payload = [ ord(c) for c in buffered_packet.get_payload("server") ] #payload.insert(0, -1) # mark as beginning of payloads X = numpy.reshape(payload, (1, len(payload))) Y = numpy.reshape( np_utils.to_categorical(payload, num_classes=256), (1, len(payload), 256)) if len(payload) > longest_length: longest_length = len(payload) counter += 1 if counter % batch_size == 1: dataX = X dataY = Y elif counter % batch_size == 0: dataX = pad_sequences(dataX) longest_length = 0 yield dataX, dataY else: #dataX = numpy.r_["0,2", dataX, X] #dataY = numpy.r_["0,2", dataY, Y] dataX.append(X) dataY.append(Y) # print "Total sequences: {}".format(counter) prt.reset_read_status()