def size_count(packets): outgoing = [] incoming = [] for p in packets: if p[2] == -1: incoming.append(p) else: outgoing.append(p) try: out_size_list = su.csv_numpy('stats/adapt_out_distribution_size.csv') except IOError: with open('stats/adapt_out_distribution_size.csv', 'a') as build: writer = csv.writer(build) writer.writerow(['size', 'count']) out_size_list = su.csv_numpy('stats/adapt_out_distribution_size.csv') try: in_size_list = su.csv_numpy('stats/adapt_in_distribution_size.csv') except IOError: with open('stats/adapt_in_distribution_size.csv', 'a') as build: writer = csv.writer(build) writer.writerow(['size', 'count']) in_size_list = su.csv_numpy('stats/adapt_in_distribution_size.csv') out_exist = False for p in outgoing: for s in out_size_list: if p[1] == s[0]: s[1] = s[1] + 1 out_exist = True break if not out_exist: new_out_size = [p[1], 1] out_size_list.append(new_out_size) out_exist = False db_df = pd.DataFrame(out_size_list, columns=['size', 'count']) db_df.to_csv('stats/adapt_out_distribution_size.csv', index=False) in_exist = False for p in incoming: for s in in_size_list: if p[1] == s[0]: s[1] = s[1] + 1 in_exist = True break if not in_exist: new_in_size = [p[1], 1] in_size_list.append(new_in_size) in_exist = False db_df = pd.DataFrame(in_size_list, columns=['size', 'count']) db_df.to_csv('stats/adapt_in_distribution_size.csv', index=False)
def main(opts): # duplex_path = "half_duplex/Announce_Happy_Valentines_Day_29__0218._HD.csv" # out_interval = 0.5 # in_interval = 1 # size = 1500 duplex_path = opts.hdPath size = float(opts.size) out_interval = float(opts.oInterval) in_interval = float(opts.iInterval) pf = Path(duplex_path) trace_name = pf.name[0:-7] duplex_list = su.csv_numpy(duplex_path) index = divide_list(duplex_list) outgoing_list = duplex_list[0:index + 1] # same_outgoing = outgoing_process_buflo_method(outgoing_list, 1500, out_interval, size) same_outgoing = outgoing_process(trace_name, out_interval) outgoing_end = same_outgoing[-1][0] incoming_list = duplex_list[index + 1:len(duplex_list)] logk_incoming = incoming_process(trace_name, incoming_list, in_interval, size, outgoing_end) logk_list = same_outgoing + logk_incoming logk_df = pd.DataFrame( logk_list, columns=['time', 'size', 'direction', 'overhead', 'type']) logk_df.to_csv("logk_list/" + trace_name + '_logk_.csv', index=False) print('logk of ' + trace_name + "is finished")
def outgoing_process(trace_name, interval): outgoing_stats = su.csv_numpy("stats/filter_stats.csv") outgoing_stats.sort(key=su.sort_by_fourth) out_num = outgoing_stats[-1][3] size = 1500 original_num = 0 remainder = 0 time = 0 for p in outgoing_stats: if p[0] == trace_name: original_num = p[3] remainder = p[4] break out_list = [] for i in range(int(original_num) + 1): time = i * interval packet = [time, size, 1, 0, 'original'] if i == original_num: packet = [time, size, 1, 1500 - remainder, 'padded'] out_list.append(packet) for i in range(int(out_num - original_num - 1)): time = time + (i + 1) * interval packet = [time, size, 1, size, 'dummy'] out_list.append(packet) return out_list
def main(opts): csv_path = opts.csvPath packets = su.csv_numpy(csv_path) pf = Path(csv_path) trace_name = pf.name[0:-4] size_count(packets) interval_count(packets) print(csv_path + 'is finished')
def incoming_process(trace_name, incoming_list, interval, size, start_time): distribution = su.csv_numpy("stats/logk_distribution.csv") stats = su.csv_numpy("stats/filter_stats.csv") log_in_num = 0 remainder = 0 duplex_in_num = 0 time = 0 for p in stats: if p[0] == trace_name: duplex_in_num = p[5] remainder = p[6] break for p in distribution: if su.same_name(p[0], trace_name): if p[-1] == '': added_num = 0 else: added_num = int(float(p[-1])) log_in_num = added_num + p[-3] break # original_end = packets[-1][1] incoming = [] added_num = log_in_num - duplex_in_num - 1 index = 0 for i in range(int(duplex_in_num) + 1): time = start_time + interval * (i + 1) # index = start_index + i + 1 direction = -1 original_packet = [time, size, direction, 0, 'original'] if i == duplex_in_num: original_packet = [ time, size, direction, size - remainder, 'padded' ] incoming.append(original_packet) for i in range(int(added_num)): time = time + interval # index = index + i + 1 direction = -1 dummy_packet = [time, size, direction, size, 'dummy'] incoming.append(dummy_packet) return incoming
def logk_process(filter_in_stats): filter_in_stats.sort(key=su.sort_by_name) same_traces = [] for i, p in enumerate(filter_in_stats): if i == 0: same_traces.append(p) continue if i == len(filter_in_stats) - 1: same_traces.append(p) same_traces.sort(key=su.sort_by_third) with open("stats/logk_analysis.csv", 'a') as _in: writer = csv.writer(_in) writer.writerow(same_traces[-1]) # traces_df = pd.DataFrame(same_traces) # traces_df.to_csv('stats' + filter_in_stats[i - 1][0] + '.csv') continue if su.same_name(p[0], filter_in_stats[i - 1][0]): same_traces.append(p) else: same_traces.sort(key=su.sort_by_third) with open("stats/logk_analysis.csv", 'a') as _in: writer = csv.writer(_in) writer.writerow(same_traces[-1]) same_traces = [] same_traces.append(p) filter_in_stats = su.csv_numpy("stats/logk_analysis.csv") filter_in_stats.sort(key=su.sort_by_third) l = len(filter_in_stats) k = int(np.ceil(l / 2)) start = 0 logk_list = [] while (k >= 1): if start + k <= len(filter_in_stats): print("index({},{})".format(start, start + k - 1)) if start == len(filter_in_stats) - 2: anonymity_list = filter_in_stats[start:(start + k + 1)] else: anonymity_list = filter_in_stats[start:(start + k)] logk = add_dummy(anonymity_list) start = start + k logk_list = logk_list + logk else: break k = int(np.ceil((l - start) / 2)) print(len(logk_list)) echo_df2 = pd.DataFrame(logk_list, columns=[ 'name', 'original_in_num', 'duplex_num', 'remainder', 'padded_num' ]) echo_df2.to_csv("stats/logk_distribution.csv", index=False)
def main(opts): # csv_path = 'csv/gamma/1/Announce_Happy_Valentines_Day_??_Google_0_.csv' csv_path = opts.csvPath packets = su.csv_numpy(csv_path) pf = Path(csv_path) trace_name = pf.name[0:-4] size_count(packets) interval_count(packets) print(csv_path + 'is finished')
def main(opt): # csv_path = 'csv/gamma/1/Announce_Happy_Valentines_Day_??_Google_0_.csv' # folder = 'test' # eps = 0.005 csv_path = opts.csvPath folder = opts.folder eps = float(opts.eps) packets = su.csv_numpy(csv_path) pf = Path(csv_path) trace_name = pf.name[0:-4] distribution_generator(packets, trace_name, folder, eps) print(csv_path + ' is finished')
def main(opts): # csv_path = "/home/lhp/PycharmProjects/pcap_csv/csv/Announce_Happy_Valentines_Day_1__0218.csv" # half_duplex_path = "/home/lhp/PycharmProjects/pcap_csv/half_duplex/" csv_path = opts.csvPath half_duplex_path = opts.duplexPath pf = Path(csv_path) trace_name = pf.name[0:-3] packet_list = su.csv_numpy(csv_path) duplex_list = half_duplex(packet_list, trace_name) duplex_df = pd.DataFrame(duplex_list, columns=['time', 'size', 'direction']) duplex_df.to_csv(half_duplex_path + trace_name + "_HD.csv", index=False) print("Half_duplex of " + trace_name + " is finished")
def main(): # incoming_stats_path = "/home/lhp/PycharmProjects/pcap_csv/stats/incoming_number.csv" # outgoing_stats_path = "/home/lhp/PycharmProjects/pcap_csv/stats/outgoing_number.csv" # # in_stats = csv_numpy(incoming_stats_path) # out_stats = csv_numpy(outgoing_stats_path) # in_stats.sort(key=sort_by_second) stats_path = "stats/stats.csv" stats = su.csv_numpy(stats_path) for p in stats: num = int(p[3]) + int(p[5]) p.append(num) stats_out = outgoing_process(stats) incoming_process(stats_out)
def main(opt): csv_path = opts.csvPath folder = opts.folder eps = float(opts.eps) dst = '/home/lhp/PycharmProjects/2019_spring_data/optionB/' + str( eps) + '/' + folder if not os.path.isdir(dst): os.makedirs(dst) packets = su.csv_numpy(csv_path) pf = Path(csv_path) trace_name = pf.name[0:-4] distribution_generator(packets, trace_name, folder, eps) print(csv_path + ' is finished')
def main(): # incoming_stats_path = "/home/lhp/PycharmProjects/pcap_csv/stats/incoming_number.csv" # outgoing_stats_path = "/home/lhp/PycharmProjects/pcap_csv/stats/outgoing_number.csv" # # in_stats = csv_numpy(incoming_stats_path) # out_stats = csv_numpy(outgoing_stats_path) # in_stats.sort(key=sort_by_second) stats_path = "stats/stats.csv" stats = su.csv_numpy(stats_path) for p in stats: num = int(p[3]) + int(p[5]) p.append(num) # echo_df2 = pd.DataFrame(stats, columns=['name', 'original_out_num', 'original_in_num', 'duplex_out_num', 'duplex_in_num','max_interval','min_interva','ave_interval','original_total_num']) # echo_df2.to_csv("stats/stats1.csv", index=False) stats_out = outgoing_process(stats) incoming_process(stats_out)
def distribution_generator(packets, trace_name, folder, eps): in_size_path = 'stats/distribution_gamma/adapt_in_distribution_size.csv' in_interval_path = 'stats/distribution_gamma/adapt_in_distribution_interval.csv' in_size_list = su.csv_numpy(in_size_path) in_interval_list = su.csv_numpy(in_interval_path) in_size_list.sort(key=su.sort_by_second, reverse=True) in_interval_list.sort(key=su.sort_by_second, reverse=True) in_size_list = in_size_list[0:100] calculate_ratio(in_size_list) calculate_ratio(in_interval_list) ori_end = packets[-1][0] ori_size = 0 outgoing = [] incoming = [] for p in packets: if p[2] == -1: incoming.append(p) else: outgoing.append([p[0], 0, p[2]]) for p in incoming: ori_size = ori_size + p[1] (outgoing_ap, outgoing_lap, out_ap_overhead, out_lap_overhead, out_ap_et_overhead, out_lap_et_overhead, out_unfinished, out_proc_q, positive_1) = fill_gap_lap(outgoing, out_size_list, out_interval_list, eps) (incoming_ap, incoming_lap, in_ap_overhead, in_lap_overhead, in_ap_et_overhead, in_lap_et_overhead, in_unfinished, in_proc_q, positive_2) = fill_gap_lap(incoming, in_size_list, in_interval_list, eps) outgoing_lap.pop(0) incoming_lap.pop(0) outgoing_ap.pop(0) incoming_ap.pop(0) buffer_list = list(out_proc_q.queue) + list(in_proc_q.queue) buffer_list.sort(key=su.sort_by_name) # # buffer_list.append([positive_1 + positive_2]) buffer_list.append([len(incoming_lap) + len(outgoing_lap)]) buffer_list = list(in_proc_q.queue) try: buffer_df = pd.DataFrame(buffer_list, columns=[ 'buffered_time', 'buffered_index', 'size', 'cleaned_time', 'cleaned_index', 'dummy_n', 'real_n' ]) buffer_df.to_csv( '/home/lhp/PycharmProjects/2019_spring_data/optionB/' + str(eps) + '/' + folder + '/' + trace_name + 'buffer.csv', index=False) except AssertionError: print('no proc queue!!!') ap_list = outgoing_ap + incoming_ap ap_list.sort(key=su.sort_by_name) real_ap_overhead = out_ap_overhead + in_ap_overhead real_lap_overhead = out_lap_overhead + in_lap_overhead et_ap_overhead = out_ap_et_overhead + in_ap_et_overhead et_lap_overhead = out_lap_et_overhead + in_lap_et_overhead ap_overall_overhead = real_ap_overhead + et_ap_overhead lap_overall_overhead = real_lap_overhead + et_lap_overhead unfinished = out_unfinished or in_unfinished real_ap_overhead = in_ap_overhead real_lap_overhead = in_lap_overhead et_ap_overhead = in_ap_et_overhead et_lap_overhead = in_lap_et_overhead ap_overall_overhead = real_ap_overhead + et_ap_overhead lap_overall_overhead = real_lap_overhead + et_lap_overhead unfinished = in_unfinished # # # ap_df = pd.DataFrame(ap_list, columns=['time', 'size', 'direction', 'type']) # # # ap_df.to_csv('obf_data/adapt_list/'+ folder + '/' + trace_name + '_ap.csv', index=False) # # lap_list = incoming_lap + outgoing lap_list.sort(key=su.sort_by_name) info_stat(eps, trace_name, ori_size, real_ap_overhead, et_ap_overhead, ap_overall_overhead, real_lap_overhead, et_lap_overhead, lap_overall_overhead, ori_end, lap_list[-1][0] - ori_end, unfinished) lap_df = pd.DataFrame(lap_list, columns=['time', 'size', 'direction', 'type']) if not os.path.isdir('obf_data/lap_list/' + str(eps) + '/' + folder): os.makedirs('obf_data/lap_list/' + str(eps) + '/' + folder) lap_df.to_csv('obf_data/lap_list/' + str(eps) + '/' + folder + '/' + trace_name + 'lap.csv', index=False)
def interval_count(packets): init_out_list = [[0.00001, 0], [0.00005, 0], [0.0001, 0], [0.0005, 0], [0.001, 0], [0.003, 0], [0.005, 0], [0.01, 0], [0.012, 0], [0.014, 0], [0.016, 0], [0.018, 0], [0.02, 0], [0.025, 0], [0.03, 0], [0.05, 0], [0.1, 0], [0.5, 0], [1, 0], [100000, 0]] init_in_list = [[0.00001, 0], [0.0001, 0], [0.00013, 0], [0.00015, 0], [0.00017, 0], [0.0002, 0], [0.00025, 0], [0.0003, 0], [0.0005, 0], [0.001, 0], [0.005, 0], [0.01, 0], [0.03, 0], [0.05, 0], [0.07, 0], [0.1, 0], [0.5, 0], [1.0, 0], [2.0, 0], [100000, 0]] outgoing = [] incoming = [] for p in packets: if p[2] == -1: incoming.append(p) else: outgoing.append(p) try: out_interval_list = su.csv_numpy( 'stats/adapt_out_distribution_interval.csv') except IOError: with open('stats/adapt_out_distribution_interval.csv', 'a') as build: writer = csv.writer(build) writer.writerow(['interval', 'count']) for p in init_out_list: writer.writerow(p) out_interval_list = su.csv_numpy( 'stats/adapt_out_distribution_interval.csv') try: in_interval_list = su.csv_numpy( 'stats/adapt_in_distribution_interval.csv') except IOError: with open('stats/adapt_in_distribution_interval.csv', 'a') as build: writer = csv.writer(build) writer.writerow(['interval', 'count']) for p in init_in_list: writer.writerow(p) in_interval_list = su.csv_numpy( 'stats/adapt_in_distribution_interval.csv') for i, p in enumerate(outgoing): if i == 0: continue out_interval = p[0] - outgoing[i - 1][0] for k in out_interval_list: if out_interval <= k[0]: k[1] += 1 break db_df = pd.DataFrame(out_interval_list, columns=['interval', 'count']) db_df.to_csv('stats/adapt_out_distribution_interval.csv', index=False) for i, p in enumerate(incoming): if i == 0: continue in_interval = p[0] - incoming[i - 1][0] for k in in_interval_list: if in_interval <= k[0]: k[1] += 1 break db_df = pd.DataFrame(in_interval_list, columns=['interval', 'count']) db_df.to_csv('stats/adapt_in_distribution_interval.csv', index=False)
def main(opts): # csv_path = 'csv/April/Announce_Happy_Valentines_Day_??_Google_0_.csv' # # csv_path = opts.csvPath # packets = su.csv_numpy(csv_path) # feature_chi(packets) features_title = [ 'name', 'total_time', 'total_num', 'total_bytes', 'burst_num', 'in_burst_num', 'in_burst_ratio', 'out_burst_num', 'out_burst_ratio', 'in_burst_bandwidth', 'out_burst_bandwidth', 'max_in_bandwidth', 'max_out_bandwidth', 'incoming_num', 'in_num_ratio', 'outgoing_num', 'out_num_ratio', 'incoming_bytes', 'in_bytes_ratio', 'outgoing_bytes', 'out_bytes_ratio' ] path = 'csv/April' with open('stats/feature2.csv', 'a') as title: writer = csv.writer(title) writer.writerow(features_title) m = 0 files = os.listdir(path) query_dict = {} for f in files: m += 1 src = path + '/' + f packets = su.csv_numpy(src) trace_name = su.extract_name(f) features_cal(trace_name, packets) # (total_time, total_num, total_bytes, burst_num, # in_burst_num, in_burst_ratio, # out_burst_num, out_burst_ratio, # in_burst_bandwidth, out_burst_bandwidth, # max_in_bandwidth, max_out_bandwidth, # incoming_num, in_num_ratio, # outgoing_num, out_num_ratio, # incoming_bytes, in_bytes_ratio, # outgoing_bytes, out_bytes_ratio) = features_cal(trace_name, packets) # if trace_name not in query_dict: # q = query(f,total_time, total_num, total_bytes, burst_num, # in_burst_num, in_burst_ratio, # out_burst_num, out_burst_ratio, # in_burst_bandwidth, out_burst_bandwidth, # max_in_bandwidth, max_out_bandwidth, # incoming_num, in_num_ratio, # outgoing_num, out_num_ratio, # incoming_bytes, in_bytes_ratio, # outgoing_bytes, out_bytes_ratio) # # new_query = {trace_name:q} # query_dict.update(new_query) # else: # query_dict[trace_name].total_time = (total_time + query_dict[trace_name].total_time)/2 # query_dict[trace_name].total_num = (total_num + query_dict[trace_name].total_num)/2 # query_dict[trace_name].total_bytes = (total_bytes + query_dict[trace_name].total_bytes) / 2 # query_dict[trace_name].burst_num = (burst_num + query_dict[trace_name].burst_num) / 2 # query_dict[trace_name].in_burst_num = (in_burst_num + query_dict[trace_name].in_burst_num) / 2 # query_dict[trace_name].in_burst_ratio = (in_burst_ratio + query_dict[trace_name].in_burst_ratio) / 2 # query_dict[trace_name].out_burst_num = (out_burst_num + query_dict[trace_name].out_burst_num) / 2 # query_dict[trace_name].out_burst_ratio = (out_burst_ratio + query_dict[trace_name].out_burst_ratio) / 2 # query_dict[trace_name].out_burst_bandwidth = (out_burst_bandwidth + query_dict[trace_name].out_burst_bandwidth) / 2 # query_dict[trace_name].in_burst_bandwidth = (in_burst_bandwidth + query_dict[trace_name].in_burst_bandwidth) / 2 # query_dict[trace_name].max_in_bandwidth = (max_in_bandwidth + query_dict[trace_name].max_in_bandwidth) / 2 # query_dict[trace_name].max_out_bandwidth = (max_out_bandwidth + query_dict[trace_name].max_out_bandwidth) / 2 # query_dict[trace_name].incoming_num = (incoming_num + query_dict[trace_name].incoming_num) / 2 # # query_dict[trace_name].in_num_ratio = (in_num_ratio + query_dict[trace_name].in_num_ratio) / 2 # query_dict[trace_name].outgoing_num = (outgoing_num + query_dict[trace_name].outgoing_num) / 2 # query_dict[trace_name].out_num_ratio = (out_num_ratio + query_dict[trace_name].out_num_ratio) / 2 # query_dict[trace_name].incoming_bytes = (incoming_bytes + query_dict[trace_name].incoming_bytes) / 2 # query_dict[trace_name].in_bytes_ratio = (in_bytes_ratio + query_dict[trace_name].in_bytes_ratio) / 2 # query_dict[trace_name].out_bytes_ratio = (out_bytes_ratio + query_dict[trace_name].out_bytes_ratio) / 2 # query_dict[trace_name].outgoing_bytes = (outgoing_bytes + query_dict[trace_name].outgoing_bytes) / 2 if m % 100 == 0: print(m)