def analysis_report_pcap(pcap_file: str, total_flows_from_trace: int = 0) -> str: pcap_reader = RawPcapReader(pcap_file) total_reports = 0 skipped = 0 dropped = 0 # based on seq number prev_seq_no = {} # HW ID -> seq number five_tuple_to_prev_report_time = {} # 5-tuple -> latest report time flow_with_multiple_reports = set() valid_irgs = [] bad_irgs = [] invalid_irgs = [] while True: # import pdb; pdb.set_trace() try: packet_info = pcap_reader.next() except EOFError: break except StopIteration: break # packet_info = (raw-bytes, packet-metadata) report_pkt = Ether(packet_info[0]) if IntL45ReportFixed not in report_pkt: skipped += 1 continue if IntL45LocalReport not in report_pkt: # TODO: handle drop and queue report skipped += 1 continue int_fix_report = report_pkt[IntL45ReportFixed] int_local_report = report_pkt[IntL45LocalReport] # Check the sequence number hw_id = int_fix_report.hw_id seq_no = int_fix_report.seq_no if hw_id in prev_seq_no: dropped += seq_no - prev_seq_no[hw_id] - 1 prev_seq_no[hw_id] = seq_no # Checks the internal packet # Here we skip packets that is not a TCP or UDP packet since they can be # fragmented or something else. if TCP in int_local_report: internal_l4 = int_local_report[TCP] elif UDP in int_local_report: internal_l4 = int_local_report[UDP] else: skipped += 1 continue # Curently we only process IPv4 packets, but we can process IPv6 if needed. if IP not in int_local_report: skipped += 1 continue internal_ip = int_local_report[IP] five_tuple = (inet_aton(internal_ip.src) + inet_aton(internal_ip.dst) + int.to_bytes(internal_ip.proto, 1, "big") + int.to_bytes(internal_l4.sport, 2, "big") + int.to_bytes(internal_l4.dport, 2, "big")) if five_tuple in five_tuple_to_prev_report_time: prev_report_time = five_tuple_to_prev_report_time[five_tuple] irg = (int_local_report.egress_tstamp - prev_report_time) / 1000000000 if irg > 0: valid_irgs.append(irg) flow_with_multiple_reports.add(five_tuple) if 0 < irg and irg < 0.9: bad_irgs.append(irg) if irg <= 0: invalid_irgs.append(irg) five_tuple_to_prev_report_time[ five_tuple] = int_local_report.egress_tstamp total_reports += 1 log.info("Reports processed: {}".format(total_reports)) log.info("Skipped packets: {}".format(skipped)) total_five_tuples = len(five_tuple_to_prev_report_time) log.info("Total 5-tuples: {}".format(total_five_tuples)) log.info( "Flows with single report: {}".format(total_five_tuples - len(flow_with_multiple_reports))) log.info("Flows with multiple report: {}".format( len(flow_with_multiple_reports))) log.info("Total INT IRGs: {}".format(len(valid_irgs))) log.info("Total bad INT IRGs(<0.9s): {}".format(len(bad_irgs))) log.info("Total invalid INT IRGs(<=0s): {}".format(len(invalid_irgs))) log.info("Total report dropped: {}".format(dropped)) if total_flows_from_trace != 0: log.info("Accuracy score: {}".format(total_five_tuples * 100 / total_flows_from_trace)) if len(valid_irgs) <= 0: log.info("No valid IRGs") return log.info("Efficiency score: {}".format( (len(valid_irgs) - len(bad_irgs)) * 100 / len(valid_irgs))) # Plot Histogram and CDF report_plot_file = abspath(splitext(pcap_file)[0] + ".png") if exists(report_plot_file): os.remove(report_plot_file) bin_size = 0.25 # sec max_val = max(np.max(valid_irgs), 3) percentile_of_900_msec = stats.percentileofscore(valid_irgs, 0.9) percentile_of_one_sec = stats.percentileofscore(valid_irgs, 1) percentile_of_two_sec = stats.percentileofscore(valid_irgs, 2) percentiles = [ 1, 5, 10, percentile_of_900_msec, percentile_of_one_sec, percentile_of_two_sec, ] vlines = np.percentile(valid_irgs, percentiles) bins = np.arange(0, max_val + bin_size, bin_size) hist, bins = np.histogram(valid_irgs, bins=bins) # to percentage hist = hist / hist.sum() CY = np.cumsum(hist) _, ax = plt.subplots(figsize=(10, 10)) fig_y_max = percentile_of_two_sec / 100 + 0.1 ax.set_yticks(np.arange(0, fig_y_max, 0.1)) ax.hlines(np.arange(0, fig_y_max, 0.1), 0, 2, colors="y", linestyles=["dotted"]) ax.vlines(vlines, 0, 1, colors="green", linestyles=["dotted"]) t = int(2 / bin_size) + 1 # 2 sec -> 8+1 bins ax.plot(bins[:t], hist[:t]) ax.plot(bins[:t], CY[:t], "r--") for i in range(0, len(vlines)): x = vlines[i] y = percentiles[i] / 100 ax.text(x, y, "({:.2f}%: {:.2f})".format(percentiles[i], x)) plt.savefig(report_plot_file) log.info("Histogram and CDF graph can be found here: {}".format( report_plot_file)) return report_plot_file
def analysis_report_pcap(pcap_file: str, total_flows_from_trace: int = 0) -> None: pcap_reader = RawPcapReader(pcap_file) skipped = 0 dropped = 0 # based on seq number prev_seq_no = {} # HW ID -> seq number # Local report local_reports = 0 five_tuple_to_prev_local_report_time = {} # 5-tuple -> latest report time flow_with_multiple_local_reports = set() valid_local_report_irgs = [] bad_local_report_irgs = [] invalid_local_report_irgs = [] # Drop report drop_reports = 0 five_tuple_to_prev_drop_report_time = {} # 5-tuple -> latest report time flow_with_multiple_drop_reports = set() valid_drop_report_irgs = [] bad_drop_report_irgs = [] invalid_drop_report_irgs = [] pkt_processed = 0 while True: try: packet_info = pcap_reader.next() except EOFError: break except StopIteration: break pkt_processed += 1 # packet_info = (raw-bytes, packet-metadata) report_pkt = Ether(packet_info[0]) # packet enter time in nano seconds packet_enter_time = packet_info[1].sec * 1000000000 + packet_info[ 1].usec * 1000 if IntL45ReportFixed not in report_pkt: skipped += 1 continue int_fix_report = report_pkt[IntL45ReportFixed] if IntL45LocalReport in report_pkt: local_reports += 1 int_report = report_pkt[IntL45LocalReport] packet_enter_time = int_report.egress_tstamp five_tuple_to_prev_report_time = five_tuple_to_prev_local_report_time flow_with_multiple_reports = flow_with_multiple_local_reports valid_report_irgs = valid_local_report_irgs bad_report_irgs = bad_local_report_irgs invalid_report_irgs = invalid_local_report_irgs elif IntL45DropReport in report_pkt: drop_reports += 1 int_report = report_pkt[IntL45DropReport] five_tuple_to_prev_report_time = five_tuple_to_prev_drop_report_time flow_with_multiple_reports = flow_with_multiple_drop_reports valid_report_irgs = valid_drop_report_irgs bad_report_irgs = bad_drop_report_irgs invalid_report_irgs = invalid_drop_report_irgs else: # TODO: handle queue report skipped += 1 continue # Check the sequence number hw_id = int_fix_report.hw_id seq_no = int_fix_report.seq_no if hw_id in prev_seq_no: dropped += seq_no - prev_seq_no[hw_id] - 1 prev_seq_no[hw_id] = seq_no # Curently we only process IPv4 packets, but we can process IPv6 if needed. if IP not in int_report: skipped += 1 continue # Checks the internal packet # Here we skip packets that is not a TCP or UDP packet since they can be # fragmented or something else. if TCP in int_report: internal_l4 = int_report[TCP] elif UDP in int_report: internal_l4 = int_report[UDP] else: skipped += 1 continue internal_ip = int_report[IP] five_tuple = ( inet_aton(internal_ip.src), inet_aton(internal_ip.dst), int.to_bytes(internal_ip.proto, 1, "big"), int.to_bytes(internal_l4.sport, 2, "big"), int.to_bytes(internal_l4.dport, 2, "big"), ) if five_tuple in five_tuple_to_prev_report_time: prev_report_time = five_tuple_to_prev_report_time[five_tuple] irg = (packet_enter_time - prev_report_time) / 1000000000 if irg > 0: valid_report_irgs.append(irg) flow_with_multiple_reports.add(five_tuple) if 0 < irg and irg < 0.9: bad_report_irgs.append(irg) if irg <= 0: invalid_report_irgs.append(irg) five_tuple_to_prev_report_time[five_tuple] = packet_enter_time log.info("Pkt processed: {}".format(pkt_processed)) # Local report log.info("Local reports: {}".format(local_reports)) log.info("Total 5-tuples: {}".format( len(five_tuple_to_prev_local_report_time))) log.info("Flows with multiple report: {}".format( len(flow_with_multiple_local_reports))) log.info("Total INT IRGs: {}".format(len(valid_local_report_irgs))) log.info("Total bad INT IRGs(<0.9s): {}".format( len(bad_local_report_irgs))) log.info("Total invalid INT IRGs(<=0s): {}".format( len(invalid_local_report_irgs))) if total_flows_from_trace != 0: log.info("Accuracy score: {}".format( len(five_tuple_to_prev_local_report_time) * 100 / total_flows_from_trace)) if len(valid_local_report_irgs) <= 0: log.info("No valid local report IRGs") else: log.info("Efficiency score: {}".format( (len(valid_local_report_irgs) - len(bad_local_report_irgs)) * 100 / len(valid_local_report_irgs))) # Plot Histogram and CDF report_plot_file = abspath(splitext(pcap_file)[0] + "-local" + ".png") plot_histogram_and_cdf(report_plot_file, valid_local_report_irgs) # Drop report log.info("----------------------") log.info("Drop reports: {}".format(drop_reports)) log.info("Total 5-tuples: {}".format( len(five_tuple_to_prev_drop_report_time))) log.info("Flows with multiple report: {}".format( len(flow_with_multiple_drop_reports))) log.info("Total INT IRGs: {}".format(len(valid_drop_report_irgs))) log.info("Total bad INT IRGs(<0.9s): {}".format(len(bad_drop_report_irgs))) log.info("Total invalid INT IRGs(<=0s): {}".format( len(invalid_drop_report_irgs))) log.info("Total report dropped: {}".format(dropped)) log.info("Skipped packets: {}".format(skipped)) if len(valid_drop_report_irgs) <= 0: log.info("No valid drop report IRGs") else: log.info("Efficiency score: {}".format( (len(valid_drop_report_irgs) - len(bad_drop_report_irgs)) * 100 / len(valid_drop_report_irgs))) report_plot_file = abspath(splitext(pcap_file)[0] + "-drop" + ".png") plot_histogram_and_cdf(report_plot_file, valid_drop_report_irgs)