def analysis_report_pcap(pcap_file: str,
                         total_flows_from_trace: int = 0) -> str:
    pcap_reader = RawPcapReader(pcap_file)
    total_reports = 0
    skipped = 0
    dropped = 0  # based on seq number
    prev_seq_no = {}  # HW ID -> seq number
    five_tuple_to_prev_report_time = {}  # 5-tuple -> latest report time
    flow_with_multiple_reports = set()
    valid_irgs = []
    bad_irgs = []
    invalid_irgs = []

    while True:
        # import pdb; pdb.set_trace()
        try:
            packet_info = pcap_reader.next()
        except EOFError:
            break
        except StopIteration:
            break

        # packet_info = (raw-bytes, packet-metadata)
        report_pkt = Ether(packet_info[0])

        if IntL45ReportFixed not in report_pkt:
            skipped += 1
            continue

        if IntL45LocalReport not in report_pkt:
            # TODO: handle drop and queue report
            skipped += 1
            continue

        int_fix_report = report_pkt[IntL45ReportFixed]
        int_local_report = report_pkt[IntL45LocalReport]

        # Check the sequence number
        hw_id = int_fix_report.hw_id
        seq_no = int_fix_report.seq_no
        if hw_id in prev_seq_no:
            dropped += seq_no - prev_seq_no[hw_id] - 1
        prev_seq_no[hw_id] = seq_no

        # Checks the internal packet
        # Here we skip packets that is not a TCP or UDP packet since they can be
        # fragmented or something else.

        if TCP in int_local_report:
            internal_l4 = int_local_report[TCP]
        elif UDP in int_local_report:
            internal_l4 = int_local_report[UDP]
        else:
            skipped += 1
            continue

        # Curently we only process IPv4 packets, but we can process IPv6 if needed.
        if IP not in int_local_report:
            skipped += 1
            continue

        internal_ip = int_local_report[IP]
        five_tuple = (inet_aton(internal_ip.src) + inet_aton(internal_ip.dst) +
                      int.to_bytes(internal_ip.proto, 1, "big") +
                      int.to_bytes(internal_l4.sport, 2, "big") +
                      int.to_bytes(internal_l4.dport, 2, "big"))

        if five_tuple in five_tuple_to_prev_report_time:
            prev_report_time = five_tuple_to_prev_report_time[five_tuple]
            irg = (int_local_report.egress_tstamp -
                   prev_report_time) / 1000000000
            if irg > 0:
                valid_irgs.append(irg)
            flow_with_multiple_reports.add(five_tuple)

            if 0 < irg and irg < 0.9:
                bad_irgs.append(irg)
            if irg <= 0:
                invalid_irgs.append(irg)

        five_tuple_to_prev_report_time[
            five_tuple] = int_local_report.egress_tstamp
        total_reports += 1

    log.info("Reports processed: {}".format(total_reports))
    log.info("Skipped packets: {}".format(skipped))
    total_five_tuples = len(five_tuple_to_prev_report_time)
    log.info("Total 5-tuples: {}".format(total_five_tuples))
    log.info(
        "Flows with single report: {}".format(total_five_tuples -
                                              len(flow_with_multiple_reports)))
    log.info("Flows with multiple report: {}".format(
        len(flow_with_multiple_reports)))
    log.info("Total INT IRGs: {}".format(len(valid_irgs)))
    log.info("Total bad INT IRGs(<0.9s): {}".format(len(bad_irgs)))
    log.info("Total invalid INT IRGs(<=0s): {}".format(len(invalid_irgs)))
    log.info("Total report dropped: {}".format(dropped))

    if total_flows_from_trace != 0:
        log.info("Accuracy score: {}".format(total_five_tuples * 100 /
                                             total_flows_from_trace))

    if len(valid_irgs) <= 0:
        log.info("No valid IRGs")
        return

    log.info("Efficiency score: {}".format(
        (len(valid_irgs) - len(bad_irgs)) * 100 / len(valid_irgs)))

    # Plot Histogram and CDF
    report_plot_file = abspath(splitext(pcap_file)[0] + ".png")
    if exists(report_plot_file):
        os.remove(report_plot_file)
    bin_size = 0.25  # sec
    max_val = max(np.max(valid_irgs), 3)
    percentile_of_900_msec = stats.percentileofscore(valid_irgs, 0.9)
    percentile_of_one_sec = stats.percentileofscore(valid_irgs, 1)
    percentile_of_two_sec = stats.percentileofscore(valid_irgs, 2)
    percentiles = [
        1,
        5,
        10,
        percentile_of_900_msec,
        percentile_of_one_sec,
        percentile_of_two_sec,
    ]
    vlines = np.percentile(valid_irgs, percentiles)

    bins = np.arange(0, max_val + bin_size, bin_size)
    hist, bins = np.histogram(valid_irgs, bins=bins)

    # to percentage
    hist = hist / hist.sum()

    CY = np.cumsum(hist)

    _, ax = plt.subplots(figsize=(10, 10))

    fig_y_max = percentile_of_two_sec / 100 + 0.1
    ax.set_yticks(np.arange(0, fig_y_max, 0.1))
    ax.hlines(np.arange(0, fig_y_max, 0.1),
              0,
              2,
              colors="y",
              linestyles=["dotted"])
    ax.vlines(vlines, 0, 1, colors="green", linestyles=["dotted"])

    t = int(2 / bin_size) + 1  # 2 sec -> 8+1 bins
    ax.plot(bins[:t], hist[:t])
    ax.plot(bins[:t], CY[:t], "r--")

    for i in range(0, len(vlines)):
        x = vlines[i]
        y = percentiles[i] / 100
        ax.text(x, y, "({:.2f}%: {:.2f})".format(percentiles[i], x))

    plt.savefig(report_plot_file)
    log.info("Histogram and CDF graph can be found here: {}".format(
        report_plot_file))
    return report_plot_file
示例#2
0
def analysis_report_pcap(pcap_file: str,
                         total_flows_from_trace: int = 0) -> None:
    pcap_reader = RawPcapReader(pcap_file)
    skipped = 0
    dropped = 0  # based on seq number
    prev_seq_no = {}  # HW ID -> seq number

    # Local report
    local_reports = 0
    five_tuple_to_prev_local_report_time = {}  # 5-tuple -> latest report time
    flow_with_multiple_local_reports = set()
    valid_local_report_irgs = []
    bad_local_report_irgs = []
    invalid_local_report_irgs = []

    # Drop report
    drop_reports = 0
    five_tuple_to_prev_drop_report_time = {}  # 5-tuple -> latest report time
    flow_with_multiple_drop_reports = set()
    valid_drop_report_irgs = []
    bad_drop_report_irgs = []
    invalid_drop_report_irgs = []
    pkt_processed = 0
    while True:
        try:
            packet_info = pcap_reader.next()
        except EOFError:
            break
        except StopIteration:
            break
        pkt_processed += 1

        # packet_info = (raw-bytes, packet-metadata)
        report_pkt = Ether(packet_info[0])
        # packet enter time in nano seconds
        packet_enter_time = packet_info[1].sec * 1000000000 + packet_info[
            1].usec * 1000

        if IntL45ReportFixed not in report_pkt:
            skipped += 1
            continue

        int_fix_report = report_pkt[IntL45ReportFixed]
        if IntL45LocalReport in report_pkt:
            local_reports += 1
            int_report = report_pkt[IntL45LocalReport]
            packet_enter_time = int_report.egress_tstamp
            five_tuple_to_prev_report_time = five_tuple_to_prev_local_report_time
            flow_with_multiple_reports = flow_with_multiple_local_reports
            valid_report_irgs = valid_local_report_irgs
            bad_report_irgs = bad_local_report_irgs
            invalid_report_irgs = invalid_local_report_irgs
        elif IntL45DropReport in report_pkt:
            drop_reports += 1
            int_report = report_pkt[IntL45DropReport]
            five_tuple_to_prev_report_time = five_tuple_to_prev_drop_report_time
            flow_with_multiple_reports = flow_with_multiple_drop_reports
            valid_report_irgs = valid_drop_report_irgs
            bad_report_irgs = bad_drop_report_irgs
            invalid_report_irgs = invalid_drop_report_irgs
        else:
            # TODO: handle queue report
            skipped += 1
            continue

        # Check the sequence number
        hw_id = int_fix_report.hw_id
        seq_no = int_fix_report.seq_no
        if hw_id in prev_seq_no:
            dropped += seq_no - prev_seq_no[hw_id] - 1
        prev_seq_no[hw_id] = seq_no

        # Curently we only process IPv4 packets, but we can process IPv6 if needed.
        if IP not in int_report:
            skipped += 1
            continue

        # Checks the internal packet
        # Here we skip packets that is not a TCP or UDP packet since they can be
        # fragmented or something else.

        if TCP in int_report:
            internal_l4 = int_report[TCP]
        elif UDP in int_report:
            internal_l4 = int_report[UDP]
        else:
            skipped += 1
            continue

        internal_ip = int_report[IP]
        five_tuple = (
            inet_aton(internal_ip.src),
            inet_aton(internal_ip.dst),
            int.to_bytes(internal_ip.proto, 1, "big"),
            int.to_bytes(internal_l4.sport, 2, "big"),
            int.to_bytes(internal_l4.dport, 2, "big"),
        )

        if five_tuple in five_tuple_to_prev_report_time:
            prev_report_time = five_tuple_to_prev_report_time[five_tuple]
            irg = (packet_enter_time - prev_report_time) / 1000000000
            if irg > 0:
                valid_report_irgs.append(irg)
            flow_with_multiple_reports.add(five_tuple)

            if 0 < irg and irg < 0.9:
                bad_report_irgs.append(irg)
            if irg <= 0:
                invalid_report_irgs.append(irg)

        five_tuple_to_prev_report_time[five_tuple] = packet_enter_time

    log.info("Pkt processed: {}".format(pkt_processed))
    # Local report
    log.info("Local reports: {}".format(local_reports))
    log.info("Total 5-tuples: {}".format(
        len(five_tuple_to_prev_local_report_time)))
    log.info("Flows with multiple report: {}".format(
        len(flow_with_multiple_local_reports)))
    log.info("Total INT IRGs: {}".format(len(valid_local_report_irgs)))
    log.info("Total bad INT IRGs(<0.9s): {}".format(
        len(bad_local_report_irgs)))
    log.info("Total invalid INT IRGs(<=0s): {}".format(
        len(invalid_local_report_irgs)))
    if total_flows_from_trace != 0:
        log.info("Accuracy score: {}".format(
            len(five_tuple_to_prev_local_report_time) * 100 /
            total_flows_from_trace))

    if len(valid_local_report_irgs) <= 0:
        log.info("No valid local report IRGs")
    else:
        log.info("Efficiency score: {}".format(
            (len(valid_local_report_irgs) - len(bad_local_report_irgs)) * 100 /
            len(valid_local_report_irgs)))
        # Plot Histogram and CDF
        report_plot_file = abspath(splitext(pcap_file)[0] + "-local" + ".png")
        plot_histogram_and_cdf(report_plot_file, valid_local_report_irgs)

    # Drop report
    log.info("----------------------")
    log.info("Drop reports: {}".format(drop_reports))
    log.info("Total 5-tuples: {}".format(
        len(five_tuple_to_prev_drop_report_time)))
    log.info("Flows with multiple report: {}".format(
        len(flow_with_multiple_drop_reports)))
    log.info("Total INT IRGs: {}".format(len(valid_drop_report_irgs)))
    log.info("Total bad INT IRGs(<0.9s): {}".format(len(bad_drop_report_irgs)))
    log.info("Total invalid INT IRGs(<=0s): {}".format(
        len(invalid_drop_report_irgs)))
    log.info("Total report dropped: {}".format(dropped))
    log.info("Skipped packets: {}".format(skipped))

    if len(valid_drop_report_irgs) <= 0:
        log.info("No valid drop report IRGs")
    else:
        log.info("Efficiency score: {}".format(
            (len(valid_drop_report_irgs) - len(bad_drop_report_irgs)) * 100 /
            len(valid_drop_report_irgs)))
        report_plot_file = abspath(splitext(pcap_file)[0] + "-drop" + ".png")
        plot_histogram_and_cdf(report_plot_file, valid_drop_report_irgs)