示例#1
0
def generate_mean_link_utilization_over_time_plot(parameter_name, parameter_value, trials):
    """
    Generate a graph that shows the mean utilization across all the links over time
    for each trial in the trial provider
    """
    path_capacity = 50.0
    for trial_idx, the_trial in enumerate(trials):
        print(f"generate_mean_utilization_over_time_plot: {trial_idx}, {the_trial.name}")
        link_utilization_over_time = the_trial.get_parameter("link-utilization-over-time")
        data_for_links = {link_tuple: util_list
                for link_tuple, util_list
                in link_tuple_to_util_list(link_utilization_over_time).items()
                if link_tuple[0] == "of:0000000000000001"}
        ys = {link_tuple: [min(path_capacity, util_val) / path_capacity for util_val in util_val_list]
                for link_tuple, util_val_list in data_for_links.items()}
        # The next line assumes that the same number of network snapshots were captured
        # for each of the links, I think this will always happen but this will throw
        # if that is not the case.
        throughputs_over_time = [np.mean([util_list[time_idx] for util_list in ys.values()])
                for time_idx in range(len(next(iter(data_for_links.values()))))]
        xs = [idx for idx in range(len(next(iter(data_for_links.values()))))]
        helpers.plot_a_scatter(xs, throughputs_over_time, idx=trial_idx, label=the_trial.name)


    helpers.xlabel(helpers.axis_label_font("Time"))
    helpers.ylabel(helpers.axis_label_font("Mean link utilization"))
    helpers.save_figure(f"mean-utilization-over-time-{parameter_name}-{parameter_value}.pdf", num_cols=3)
def generate_learning_rate_plots():
    # - step: return base_lr * gamma ^ (floor(iter / step))
    # - exp: return base_lr * gamma ^ iter
    # - inv: return base_lr * (1 + gamma * iter) ^ (- power)
    base_lr = 0.01
    gamma_step = 0.9999
    gamma_inv = 0.0001
    step = 1
    power = 0.75
    xs = [x for x in range(1, 50001, 10)]
    inv_learning_rate = [
        base_lr * (1 + gamma_inv * iteration)**(-power) for iteration in xs
    ]
    step_learning_rate = [
        base_lr * (gamma_step**(floor(iteration / step))) for iteration in xs
    ]

    helpers.plot_a_line(xs,
                        step_learning_rate,
                        label="Step",
                        idx=6,
                        plot_markers=False)
    helpers.plot_a_line(xs,
                        inv_learning_rate,
                        label="Inverse",
                        idx=7,
                        plot_markers=False)
    helpers.xlabel("Training Iteration")
    helpers.ylabel("Learning Rate")
    helpers.save_figure(figure_output_dir / "learning-rate.pdf", num_cols=2)
示例#3
0
def generate_mean_throughput_over_time_plot(parameter_name, parameter_value, trials):
    """
    Generate a graph that shows the mean throughput across all the links over time for 
    each trial in trial provider.
    """
    path_capacity = 50.0
    for trial_idx, the_trial in enumerate(trials):
        print(f"generate_mean_throughput_over_time: {trial_idx}, {the_trial.name}")
        # number_of_paths = the_trial.get_parameter("number-of-paths")
        link_utilization_over_time = the_trial.get_parameter("link-utilization-over-time")
        data_for_links = {link_tuple: util_list
                for link_tuple, util_list 
                in link_tuple_to_util_list(link_utilization_over_time).items()
                if link_tuple[0] == "of:0000000000000001"}
        ys = {link_tuple: [min(path_capacity, util_val) for util_val in util_val_list]
                for link_tuple, util_val_list in data_for_links.items()}
        throughputs_over_time = []
        for time_idx in range(len(next(iter(data_for_links.values())))):
            total_throughput = sum(util_list[time_idx] for util_list in ys.values())
            throughputs_over_time.append(total_throughput)
        xs = [idx for idx in range(len(next(iter(data_for_links.values()))))]
        helpers.plot_a_scatter(xs, throughputs_over_time, idx=trial_idx, label=the_trial.name)

    helpers.xlabel(helpers.axis_label_font("Time"))
    helpers.ylabel(helpers.axis_label_font("Mean throughput (Mi-bps)"))
    helpers.save_figure(f"throughput-over-time-{parameter_name}-{parameter_value}.pdf", num_cols=3)
def generate_per_class_accuracy_bar_plot():
    conf_mat_1 = more_dropout
    conf_mat_2 = undersampled

    fig, (ax1, ax2) = plt.subplots(2)

    bar_width = 0.1
    bar_1_xs = np.arange(0.0, 0.3 * 6.5, 0.3)
    bar_2_xs = [x_i + bar_width for x_i in bar_1_xs]
    print(bar_1_xs, bar_2_xs)

    bar_1_ys = [
        100 *
        (conf_mat_1[idx][idx] / sum(get_samples_that_are(conf_mat_1, idx + 1)))
        for idx in range(7)
    ]
    bar_2_ys = [
        100 *
        (conf_mat_2[idx][idx] / sum(get_samples_that_are(conf_mat_2, idx + 1)))
        for idx in range(7)
    ]

    plt.xticks([x_i + 0.5 * bar_width for x_i in bar_1_xs],
               [1, 2, 3, 4, 5, 6, "Outlier"])
    helpers.plot_a_bar(bar_1_xs,
                       bar_1_ys,
                       idx=0,
                       label="Augmented Dataset",
                       bar_width=bar_width,
                       axis_to_plot_on=ax2)
    helpers.plot_a_bar(bar_2_xs,
                       bar_2_ys,
                       idx=1,
                       label="Undersampled Dataset",
                       bar_width=bar_width,
                       axis_to_plot_on=ax2)
    helpers.xlabel("Class label")
    helpers.ylabel(r"Validation Accuracy (\%)", formatter=lambda x: x, ax=ax2)

    undersampled_dataset = json.loads(
        path.Path("./segments-undersampled.json").read_text())
    histogram = Counter([
        d_i["data"]["segment_type"]["data"]
        for d_i in undersampled_dataset.values()
    ])
    xs = [x_i for x_i in range(1, 8)]
    total_samples = sum(histogram.values())
    ys = [c[1] / total_samples for c in sorted(histogram.items())]
    helpers.plot_a_bar(xs, ys, idx=1, axis_to_plot_on=ax1, label_data=False)
    helpers.ylabel(r"$\mathbb{P}\{x = \mathcal{X}\}$",
                   formatter=lambda x: x,
                   ax=ax1)
    ax1.set_yticks([0.1, 0.2, 0.3])
    ax1.grid(**cfg.GRID)
    ax1.xaxis.set_ticklabels([])
    legend_params = deepcopy(cfg.LEGEND)
    legend_params["bbox_to_anchor"] = (0.5, 0.975)
    fig.legend(**legend_params, ncol=2)
    helpers.save_figure(figure_output_dir / "per-class-error-bar-plot.pdf",
                        no_legend=True)
def augment_vs_no_augment():
    with_augment = log_file_dir / "training-with-flip-dataset.log"
    no_augment = log_file_dir / "with-dropout.log"

    augment_train, augment = parse_log(str(with_augment))
    no_augment_train, no_augment = parse_log(str(no_augment))

    xs = [d_i["NumIters"] for d_i in augment]
    augment_ys = [100 * (1 - d_i["accuracy"]) for d_i in augment]
    no_augment_ys = [100 * (1 - d_i["accuracy"]) for d_i in no_augment]

    helpers.plot_a_line(xs,
                        augment_ys,
                        label="Dataset augmentations",
                        idx=4,
                        plot_markers=False)
    helpers.plot_a_line(xs,
                        no_augment_ys,
                        label="Original dataset",
                        idx=5,
                        plot_markers=False)
    plt.ylim(7.5, 20.0)

    # augment_ys = [1 - d_i["loss"] for d_i in augment_train]
    # no_augment_ys = [1 - d_i["loss"] for d_i in no_augment_train]

    # xs = [d_i["NumIters"] for d_i in augment_train]
    # helpers.plot_a_line(xs, augment_ys, label="Dataset augmentations", idx=6, plot_markers=False)
    # helpers.plot_a_line(xs, no_augment_ys, label="Original dataset", idx=7, plot_markers=False)

    helpers.xlabel("Training Iteration")
    helpers.ylabel(r"Validation Error (\%)")
    helpers.save_figure(str(figure_output_dir /
                            "dataset-augmentation-comparison.pdf"),
                        num_cols=2)
def dropout_vs_no_dropout_plot():
    with_dropout = log_file_dir / "with-dropout.log"
    no_dropout = log_file_dir / "no-dropout.log"
    more_dropout = log_file_dir / "dropout-0.75.log"

    _, dropout = parse_log(str(with_dropout))
    _, no_dropout = parse_log(str(no_dropout))
    _, more_dropout = parse_log(str(more_dropout))

    xs = [d_i["NumIters"] for d_i in dropout]
    dropout_ys = [100 * (1 - d_i["accuracy"]) for d_i in dropout]
    no_dropout_ys = [100 * (1 - d_i["accuracy"]) for d_i in no_dropout]
    more_dropout_ys = [100 * (1 - d_i["accuracy"]) for d_i in more_dropout]

    helpers.plot_a_line(xs,
                        no_dropout_ys,
                        label=r"$\mathbb{P}\{\text{dropout}\} = 0.0$",
                        idx=3,
                        plot_markers=False)
    helpers.plot_a_line(xs,
                        dropout_ys,
                        label=r"$\mathbb{P}\{\text{dropout}\} = 0.5$",
                        idx=2,
                        plot_markers=False)
    helpers.plot_a_line(xs,
                        more_dropout_ys,
                        label=r"$\mathbb{P}\{\text{dropout}\} = 0.75$",
                        idx=4,
                        plot_markers=False)
    helpers.ylim((5, 30))
    helpers.xlabel("Training Iteration")
    helpers.ylabel(r"Validation Error (\%)")

    helpers.save_figure(str(figure_output_dir / "dropout-comparison.pdf"),
                        num_cols=2)
def generate_learning_rate_comparison_plot():
    _, validation_data_step = parse_log(
        str(log_file_dir / "lr-step-training.log"))
    _, validation_data_inv = parse_log(
        str(log_file_dir / "training-with-flip-dataset.log"))

    fig, (plot1, plot2) = plt.subplots(2)

    xs = [d_i["NumIters"] for d_i in validation_data_step]
    ys_step = [100 * (1 - d_i["accuracy"]) for d_i in validation_data_step]
    ys_inv = [100 * (1 - d_i["accuracy"]) for d_i in validation_data_inv]
    plot2.set_ylim(7.5, 20)
    helpers.xlabel("Training Iterations", ax=plot2)
    helpers.ylabel(r"Validation Error (\%)", ax=plot2, formatter=lambda x: x)
    helpers.plot_a_line(xs,
                        ys_inv,
                        label="Step",
                        plot_markers=False,
                        idx=6,
                        axis_to_plot_on=plot2)
    helpers.plot_a_line(xs,
                        ys_step,
                        label="Inverse",
                        plot_markers=False,
                        idx=7,
                        axis_to_plot_on=plot2)

    base_lr = 0.01
    gamma_step = 0.9999
    gamma_inv = 0.0001
    step = 1
    power = 0.75
    xs = [x for x in range(1, 50001, 10)]
    inv_learning_rate = [
        base_lr * (1 + gamma_inv * iteration)**(-power) for iteration in xs
    ]
    step_learning_rate = [
        base_lr * (gamma_step**(floor(iteration / step))) for iteration in xs
    ]
    helpers.plot_a_line(xs,
                        step_learning_rate,
                        idx=6,
                        plot_markers=False,
                        axis_to_plot_on=plot1)
    helpers.plot_a_line(xs,
                        inv_learning_rate,
                        idx=7,
                        plot_markers=False,
                        axis_to_plot_on=plot1)
    plot1.xaxis.set_ticklabels([])
    plot1.grid(**cfg.GRID)
    helpers.ylabel("Learning Rate", ax=plot1, formatter=lambda x: x)

    legend_params = deepcopy(cfg.LEGEND)
    legend_params["bbox_to_anchor"] = (0.5, 0.975)
    fig.legend(ncol=2, **legend_params)
    helpers.save_figure(figure_output_dir / "learning-rate-comparison.pdf",
                        no_legend=True)
def generate_dataset_histogram(dataset, output_path):
    histogram = Counter(
        [d_i["data"]["segment_type"]["data"] for d_i in dataset.values()])
    xs = [1, 2, 3, 4, 5, 6, 7]
    ys = [c[1] / sum(histogram.values()) for c in sorted(histogram.items())]
    plt.xticks(xs, [1, 2, 3, 4, 5, 6, "Outlier"])
    helpers.ylabel(r"$\mathbb{P}\{x = \mathcal{X}\}$")
    helpers.xlabel("Class Label")

    helpers.plot_a_bar(xs, ys, idx=1)
    helpers.save_figure(output_path, no_legend=True)
def generate_step_vs_inv_learning_rate():

    _, validation_data_step = parse_log(
        str(log_file_dir / "lr-step-training.log"))
    _, validation_data_inv = parse_log(
        str(log_file_dir / "training-with-flip-dataset.log"))

    xs = [d_i["NumIters"] for d_i in validation_data_step]

    helpers.plot_a_line(xs, ys_step, label="Step", plot_markers=False, idx=6)
    helpers.plot_a_line(xs, ys_inv, label="Inverse", plot_markers=False, idx=7)
    plt.ylim(7.5, 20)
    helpers.xlabel("Training Iterations")
    helpers.ylabel(r"Validation Error (\%)")

    helpers.save_figure(figure_output_dir / "learning-rate-comparison.pdf",
                        num_cols=2)
示例#10
0
def generate_link_utilization_cdf(parameter_name, parameter_value, trials):
    """
    Generate a CDF that shows the mean utilization of each link for every trial in the
    provider.
    """
    link_capacity = 50.0 # Mi-bps
    for idx, trial in enumerate(trials):
        print(f"generate_link_utilization_cdf: {idx}, {trial.name}")
        utilization_results = trial.get_parameter("byte-counts-over-time")
        links = get_link_set(utilization_results)
        # print(f"Number of links based on utilization results: {len(links)}")

        mean_network_utilization = trial.get_parameter("measured-link-utilization")
        link_utilizations = sorted([link_throughput / link_capacity 
                for link_throughput 
                in mean_network_utilization.values()])
        helpers.plot_a_cdf(link_utilizations, label=trial.name, idx=idx)

    helpers.xlabel(helpers.axis_label_font("Link Utilization"))
    helpers.ylabel(helpers.axis_label_font(r"$\mathbb{P}\{x < \mathcal{X}$\}"))
    plt.legend(ncol=len(trials)//2, **cfg.LEGEND)
    helpers.save_figure(f"link-utilization-cdf-{parameter_name}-{parameter_value}.pdf", no_legend=True)
示例#11
0
def generate_per_path_packet_loss_cdf(parameter_name, parameter_value, trials):
    """
    For each trial generate a cdf of total packet loss 
    ((i.e. total packets sent - total packets received) / total packets sent)
    """
    for trial_idx, the_trial in enumerate(trials):
        print(f"generate_per_packet_loss_cdf: {trial_idx}, {the_trial.name}")
        end_host_results = the_trial.get_parameter("end-host-results")
        sender_results = end_host_results[0]["sender"]
        # print("Sender results:\n")
        # pp.pprint(sender_results)

        receiver_results = end_host_results[1]["receiver"]
        # print("Receiver results:\n")
        # pp.pprint(receiver_results)

        link_loss_rates = []
        flow_id_selector = lambda ss: ss["flow_id"]
        sender_results = sorted(list(sender_results.values()), key=flow_id_selector)
        for flow_id, flows_with_id in itertools.groupby(sender_results, flow_id_selector):
            total_sender_packets_for_path = 0
            total_receiver_packets_for_path = 0
            for the_flow in flows_with_id:
                source_port = the_flow["src_port"]
                total_sender_packets_for_path += the_flow["pkt_count"]
                total_receiver_packets_for_path += sum([packet_count 
                        for receiver_info, packet_count
                        in receiver_results.items()
                        if receiver_info[1] == source_port])
            link_loss_rate = (total_sender_packets_for_path - total_receiver_packets_for_path) \
                    / total_sender_packets_for_path
            link_loss_rates.append(link_loss_rate)

        helpers.plot_a_cdf(sorted(link_loss_rates), idx=trial_idx, label=the_trial.name)

    helpers.xlabel(helpers.axis_label_font("Packet Loss Rate"))
    helpers.ylabel(helpers.axis_label_font(r"$\mathbb{P}\{x \leq \mathcal{X}\}$"))
    helpers.save_figure(f"per-path-loss-cdf-{parameter_name}-{parameter_value}.pdf",
            num_cols=3)
def generate_learning_curve(training_data, validation_data, plot_name):

    # Extract the iteration count
    xs = [d_i["NumIters"] for d_i in training_data]
    ys = [d_i["loss"] for d_i in training_data]

    fig, ax1 = plt.subplots()
    first_line = helpers.plot_a_line(xs,
                                     ys,
                                     idx=0,
                                     label="training log loss",
                                     plot_markers=False,
                                     axis_to_plot_on=ax1)
    ax1.set_yticks([t_i for t_i in np.arange(0.0, 2.6, 0.5)])

    helpers.xlabel("Iteration", ax=ax1)
    helpers.ylabel("Training loss", ax=ax1)

    ax2 = ax1.twinx()
    helpers.ylabel(r"Validation accuracy (\%)", ax=ax2)
    xs = [d_i["NumIters"] for d_i in validation_data]
    ys = [d_i["accuracy"] * 100 for d_i in validation_data]
    second_line = helpers.plot_a_line(xs,
                                      ys,
                                      idx=1,
                                      label="validation accuracy",
                                      plot_markers=False,
                                      axis_to_plot_on=ax2)
    ax2.set_ylim((82, 93))
    ax2.set_yticks([t_i for t_i in range(82, 94, 1)])

    ax1.legend(first_line + second_line,
               ["training log loss", "validation accuracy"],
               ncol=2,
               **cfg.LEGEND)
    helpers.save_figure(plot_name, num_cols=2, no_legend=True)
示例#13
0
def generate_data_recovery_vs_param_plot(trial_provider, param_name,
                                         x_axis_label):
    param_selector = lambda t_i: t_i.get_parameter(param_name)
    sorted_trials = sorted(trial_provider, key=param_selector)
    xs = []
    attacker_types = [
        "random-path-hopping",
        "random-node-hopping"
        # , "ideal-random-path-hopping"
        ,
        "one-node-per-path",
        "fixed",
        "planned"
    ]
    means = defaultdict(list)
    errs = defaultdict(list)
    attacker_data = {}
    fig, ax = plt.subplots()
    for param_value, param_group in itertools.groupby(sorted_trials,
                                                      key=param_selector):
        param_group = list(param_group)
        ys = defaultdict(list)
        for trial in param_group:
            total_messages_sent = trial.get_parameter("sim_duration")
            for attacker_type in attacker_types:
                attacker_data[attacker_type] = trial.get_parameter(
                    f"{attacker_type}-attacker-recovered-messages")
            for attacker_type, recovered_messages in attacker_data.items():
                print(
                    f"{attacker_type} recoverd {len(recovered_messages)} out of {total_messages_sent} messages"
                )
                ys[attacker_type].append(
                    (len(recovered_messages) / total_messages_sent) * 100)

        xs.append(param_value / 5.0)
        for attacker_type, y_vals in ys.items():
            means[attacker_type].append(np.mean(y_vals))
            errs[attacker_type].append(np.std(y_vals))

    for plot_idx, (attacker_type, means) in enumerate(means.items()):
        helpers.plot_a_scatter(xs,
                               means,
                               idx=plot_idx,
                               axis_to_plot_on=ax,
                               label=attacker_type)

    # axins = ax.inset_axes([0.5, 0.6, 0.47, 0.37])
    # axins.set_xscale("log")
    # ax.set_xscale("log")
    # x1, x2, y1, y2 = 0, 100, -10, 500
    # axins.set_xlim(x1, x2)
    # axins.set_ylim(y1, y2)
    # axins.set_xticklabels("")
    # axins.set_yticklabels("")

    # helpers.plot_a_scatter(xs, random_path_means, idx=0, label="Random Path Attacker",
    #         axis_to_plot_on=axins)
    # helpers.plot_a_scatter(xs, random_node_means, idx=1, label="Random Node Attacker",
    #         axis_to_plot_on=axins)
    # helpers.plot_a_scatter(xs, one_node_per_path_means, idx=2, label="One Node per Path Attacker",
    #         axis_to_plot_on=axins)
    # helpers.plot_a_scatter(xs, fixed_means, idx=3, label="Fixed Attacker",
    #         axis_to_plot_on=axins)
    # ax.indicate_inset_zoom(axins, label=None)

    helpers.xlabel(x_axis_label)
    helpers.xlabel("Delay to hop period ratio")
    helpers.ylabel(r"\% of recovered messages")
    helpers.save_figure("attacker-simulation.pdf", num_cols=3)