def generate_mean_link_utilization_over_time_plot(parameter_name, parameter_value, trials): """ Generate a graph that shows the mean utilization across all the links over time for each trial in the trial provider """ path_capacity = 50.0 for trial_idx, the_trial in enumerate(trials): print(f"generate_mean_utilization_over_time_plot: {trial_idx}, {the_trial.name}") link_utilization_over_time = the_trial.get_parameter("link-utilization-over-time") data_for_links = {link_tuple: util_list for link_tuple, util_list in link_tuple_to_util_list(link_utilization_over_time).items() if link_tuple[0] == "of:0000000000000001"} ys = {link_tuple: [min(path_capacity, util_val) / path_capacity for util_val in util_val_list] for link_tuple, util_val_list in data_for_links.items()} # The next line assumes that the same number of network snapshots were captured # for each of the links, I think this will always happen but this will throw # if that is not the case. throughputs_over_time = [np.mean([util_list[time_idx] for util_list in ys.values()]) for time_idx in range(len(next(iter(data_for_links.values()))))] xs = [idx for idx in range(len(next(iter(data_for_links.values()))))] helpers.plot_a_scatter(xs, throughputs_over_time, idx=trial_idx, label=the_trial.name) helpers.xlabel(helpers.axis_label_font("Time")) helpers.ylabel(helpers.axis_label_font("Mean link utilization")) helpers.save_figure(f"mean-utilization-over-time-{parameter_name}-{parameter_value}.pdf", num_cols=3)
def generate_learning_rate_plots(): # - step: return base_lr * gamma ^ (floor(iter / step)) # - exp: return base_lr * gamma ^ iter # - inv: return base_lr * (1 + gamma * iter) ^ (- power) base_lr = 0.01 gamma_step = 0.9999 gamma_inv = 0.0001 step = 1 power = 0.75 xs = [x for x in range(1, 50001, 10)] inv_learning_rate = [ base_lr * (1 + gamma_inv * iteration)**(-power) for iteration in xs ] step_learning_rate = [ base_lr * (gamma_step**(floor(iteration / step))) for iteration in xs ] helpers.plot_a_line(xs, step_learning_rate, label="Step", idx=6, plot_markers=False) helpers.plot_a_line(xs, inv_learning_rate, label="Inverse", idx=7, plot_markers=False) helpers.xlabel("Training Iteration") helpers.ylabel("Learning Rate") helpers.save_figure(figure_output_dir / "learning-rate.pdf", num_cols=2)
def generate_mean_throughput_over_time_plot(parameter_name, parameter_value, trials): """ Generate a graph that shows the mean throughput across all the links over time for each trial in trial provider. """ path_capacity = 50.0 for trial_idx, the_trial in enumerate(trials): print(f"generate_mean_throughput_over_time: {trial_idx}, {the_trial.name}") # number_of_paths = the_trial.get_parameter("number-of-paths") link_utilization_over_time = the_trial.get_parameter("link-utilization-over-time") data_for_links = {link_tuple: util_list for link_tuple, util_list in link_tuple_to_util_list(link_utilization_over_time).items() if link_tuple[0] == "of:0000000000000001"} ys = {link_tuple: [min(path_capacity, util_val) for util_val in util_val_list] for link_tuple, util_val_list in data_for_links.items()} throughputs_over_time = [] for time_idx in range(len(next(iter(data_for_links.values())))): total_throughput = sum(util_list[time_idx] for util_list in ys.values()) throughputs_over_time.append(total_throughput) xs = [idx for idx in range(len(next(iter(data_for_links.values()))))] helpers.plot_a_scatter(xs, throughputs_over_time, idx=trial_idx, label=the_trial.name) helpers.xlabel(helpers.axis_label_font("Time")) helpers.ylabel(helpers.axis_label_font("Mean throughput (Mi-bps)")) helpers.save_figure(f"throughput-over-time-{parameter_name}-{parameter_value}.pdf", num_cols=3)
def generate_per_class_accuracy_bar_plot(): conf_mat_1 = more_dropout conf_mat_2 = undersampled fig, (ax1, ax2) = plt.subplots(2) bar_width = 0.1 bar_1_xs = np.arange(0.0, 0.3 * 6.5, 0.3) bar_2_xs = [x_i + bar_width for x_i in bar_1_xs] print(bar_1_xs, bar_2_xs) bar_1_ys = [ 100 * (conf_mat_1[idx][idx] / sum(get_samples_that_are(conf_mat_1, idx + 1))) for idx in range(7) ] bar_2_ys = [ 100 * (conf_mat_2[idx][idx] / sum(get_samples_that_are(conf_mat_2, idx + 1))) for idx in range(7) ] plt.xticks([x_i + 0.5 * bar_width for x_i in bar_1_xs], [1, 2, 3, 4, 5, 6, "Outlier"]) helpers.plot_a_bar(bar_1_xs, bar_1_ys, idx=0, label="Augmented Dataset", bar_width=bar_width, axis_to_plot_on=ax2) helpers.plot_a_bar(bar_2_xs, bar_2_ys, idx=1, label="Undersampled Dataset", bar_width=bar_width, axis_to_plot_on=ax2) helpers.xlabel("Class label") helpers.ylabel(r"Validation Accuracy (\%)", formatter=lambda x: x, ax=ax2) undersampled_dataset = json.loads( path.Path("./segments-undersampled.json").read_text()) histogram = Counter([ d_i["data"]["segment_type"]["data"] for d_i in undersampled_dataset.values() ]) xs = [x_i for x_i in range(1, 8)] total_samples = sum(histogram.values()) ys = [c[1] / total_samples for c in sorted(histogram.items())] helpers.plot_a_bar(xs, ys, idx=1, axis_to_plot_on=ax1, label_data=False) helpers.ylabel(r"$\mathbb{P}\{x = \mathcal{X}\}$", formatter=lambda x: x, ax=ax1) ax1.set_yticks([0.1, 0.2, 0.3]) ax1.grid(**cfg.GRID) ax1.xaxis.set_ticklabels([]) legend_params = deepcopy(cfg.LEGEND) legend_params["bbox_to_anchor"] = (0.5, 0.975) fig.legend(**legend_params, ncol=2) helpers.save_figure(figure_output_dir / "per-class-error-bar-plot.pdf", no_legend=True)
def augment_vs_no_augment(): with_augment = log_file_dir / "training-with-flip-dataset.log" no_augment = log_file_dir / "with-dropout.log" augment_train, augment = parse_log(str(with_augment)) no_augment_train, no_augment = parse_log(str(no_augment)) xs = [d_i["NumIters"] for d_i in augment] augment_ys = [100 * (1 - d_i["accuracy"]) for d_i in augment] no_augment_ys = [100 * (1 - d_i["accuracy"]) for d_i in no_augment] helpers.plot_a_line(xs, augment_ys, label="Dataset augmentations", idx=4, plot_markers=False) helpers.plot_a_line(xs, no_augment_ys, label="Original dataset", idx=5, plot_markers=False) plt.ylim(7.5, 20.0) # augment_ys = [1 - d_i["loss"] for d_i in augment_train] # no_augment_ys = [1 - d_i["loss"] for d_i in no_augment_train] # xs = [d_i["NumIters"] for d_i in augment_train] # helpers.plot_a_line(xs, augment_ys, label="Dataset augmentations", idx=6, plot_markers=False) # helpers.plot_a_line(xs, no_augment_ys, label="Original dataset", idx=7, plot_markers=False) helpers.xlabel("Training Iteration") helpers.ylabel(r"Validation Error (\%)") helpers.save_figure(str(figure_output_dir / "dataset-augmentation-comparison.pdf"), num_cols=2)
def dropout_vs_no_dropout_plot(): with_dropout = log_file_dir / "with-dropout.log" no_dropout = log_file_dir / "no-dropout.log" more_dropout = log_file_dir / "dropout-0.75.log" _, dropout = parse_log(str(with_dropout)) _, no_dropout = parse_log(str(no_dropout)) _, more_dropout = parse_log(str(more_dropout)) xs = [d_i["NumIters"] for d_i in dropout] dropout_ys = [100 * (1 - d_i["accuracy"]) for d_i in dropout] no_dropout_ys = [100 * (1 - d_i["accuracy"]) for d_i in no_dropout] more_dropout_ys = [100 * (1 - d_i["accuracy"]) for d_i in more_dropout] helpers.plot_a_line(xs, no_dropout_ys, label=r"$\mathbb{P}\{\text{dropout}\} = 0.0$", idx=3, plot_markers=False) helpers.plot_a_line(xs, dropout_ys, label=r"$\mathbb{P}\{\text{dropout}\} = 0.5$", idx=2, plot_markers=False) helpers.plot_a_line(xs, more_dropout_ys, label=r"$\mathbb{P}\{\text{dropout}\} = 0.75$", idx=4, plot_markers=False) helpers.ylim((5, 30)) helpers.xlabel("Training Iteration") helpers.ylabel(r"Validation Error (\%)") helpers.save_figure(str(figure_output_dir / "dropout-comparison.pdf"), num_cols=2)
def generate_learning_rate_comparison_plot(): _, validation_data_step = parse_log( str(log_file_dir / "lr-step-training.log")) _, validation_data_inv = parse_log( str(log_file_dir / "training-with-flip-dataset.log")) fig, (plot1, plot2) = plt.subplots(2) xs = [d_i["NumIters"] for d_i in validation_data_step] ys_step = [100 * (1 - d_i["accuracy"]) for d_i in validation_data_step] ys_inv = [100 * (1 - d_i["accuracy"]) for d_i in validation_data_inv] plot2.set_ylim(7.5, 20) helpers.xlabel("Training Iterations", ax=plot2) helpers.ylabel(r"Validation Error (\%)", ax=plot2, formatter=lambda x: x) helpers.plot_a_line(xs, ys_inv, label="Step", plot_markers=False, idx=6, axis_to_plot_on=plot2) helpers.plot_a_line(xs, ys_step, label="Inverse", plot_markers=False, idx=7, axis_to_plot_on=plot2) base_lr = 0.01 gamma_step = 0.9999 gamma_inv = 0.0001 step = 1 power = 0.75 xs = [x for x in range(1, 50001, 10)] inv_learning_rate = [ base_lr * (1 + gamma_inv * iteration)**(-power) for iteration in xs ] step_learning_rate = [ base_lr * (gamma_step**(floor(iteration / step))) for iteration in xs ] helpers.plot_a_line(xs, step_learning_rate, idx=6, plot_markers=False, axis_to_plot_on=plot1) helpers.plot_a_line(xs, inv_learning_rate, idx=7, plot_markers=False, axis_to_plot_on=plot1) plot1.xaxis.set_ticklabels([]) plot1.grid(**cfg.GRID) helpers.ylabel("Learning Rate", ax=plot1, formatter=lambda x: x) legend_params = deepcopy(cfg.LEGEND) legend_params["bbox_to_anchor"] = (0.5, 0.975) fig.legend(ncol=2, **legend_params) helpers.save_figure(figure_output_dir / "learning-rate-comparison.pdf", no_legend=True)
def generate_dataset_histogram(dataset, output_path): histogram = Counter( [d_i["data"]["segment_type"]["data"] for d_i in dataset.values()]) xs = [1, 2, 3, 4, 5, 6, 7] ys = [c[1] / sum(histogram.values()) for c in sorted(histogram.items())] plt.xticks(xs, [1, 2, 3, 4, 5, 6, "Outlier"]) helpers.ylabel(r"$\mathbb{P}\{x = \mathcal{X}\}$") helpers.xlabel("Class Label") helpers.plot_a_bar(xs, ys, idx=1) helpers.save_figure(output_path, no_legend=True)
def generate_step_vs_inv_learning_rate(): _, validation_data_step = parse_log( str(log_file_dir / "lr-step-training.log")) _, validation_data_inv = parse_log( str(log_file_dir / "training-with-flip-dataset.log")) xs = [d_i["NumIters"] for d_i in validation_data_step] helpers.plot_a_line(xs, ys_step, label="Step", plot_markers=False, idx=6) helpers.plot_a_line(xs, ys_inv, label="Inverse", plot_markers=False, idx=7) plt.ylim(7.5, 20) helpers.xlabel("Training Iterations") helpers.ylabel(r"Validation Error (\%)") helpers.save_figure(figure_output_dir / "learning-rate-comparison.pdf", num_cols=2)
def generate_link_utilization_cdf(parameter_name, parameter_value, trials): """ Generate a CDF that shows the mean utilization of each link for every trial in the provider. """ link_capacity = 50.0 # Mi-bps for idx, trial in enumerate(trials): print(f"generate_link_utilization_cdf: {idx}, {trial.name}") utilization_results = trial.get_parameter("byte-counts-over-time") links = get_link_set(utilization_results) # print(f"Number of links based on utilization results: {len(links)}") mean_network_utilization = trial.get_parameter("measured-link-utilization") link_utilizations = sorted([link_throughput / link_capacity for link_throughput in mean_network_utilization.values()]) helpers.plot_a_cdf(link_utilizations, label=trial.name, idx=idx) helpers.xlabel(helpers.axis_label_font("Link Utilization")) helpers.ylabel(helpers.axis_label_font(r"$\mathbb{P}\{x < \mathcal{X}$\}")) plt.legend(ncol=len(trials)//2, **cfg.LEGEND) helpers.save_figure(f"link-utilization-cdf-{parameter_name}-{parameter_value}.pdf", no_legend=True)
def generate_per_path_packet_loss_cdf(parameter_name, parameter_value, trials): """ For each trial generate a cdf of total packet loss ((i.e. total packets sent - total packets received) / total packets sent) """ for trial_idx, the_trial in enumerate(trials): print(f"generate_per_packet_loss_cdf: {trial_idx}, {the_trial.name}") end_host_results = the_trial.get_parameter("end-host-results") sender_results = end_host_results[0]["sender"] # print("Sender results:\n") # pp.pprint(sender_results) receiver_results = end_host_results[1]["receiver"] # print("Receiver results:\n") # pp.pprint(receiver_results) link_loss_rates = [] flow_id_selector = lambda ss: ss["flow_id"] sender_results = sorted(list(sender_results.values()), key=flow_id_selector) for flow_id, flows_with_id in itertools.groupby(sender_results, flow_id_selector): total_sender_packets_for_path = 0 total_receiver_packets_for_path = 0 for the_flow in flows_with_id: source_port = the_flow["src_port"] total_sender_packets_for_path += the_flow["pkt_count"] total_receiver_packets_for_path += sum([packet_count for receiver_info, packet_count in receiver_results.items() if receiver_info[1] == source_port]) link_loss_rate = (total_sender_packets_for_path - total_receiver_packets_for_path) \ / total_sender_packets_for_path link_loss_rates.append(link_loss_rate) helpers.plot_a_cdf(sorted(link_loss_rates), idx=trial_idx, label=the_trial.name) helpers.xlabel(helpers.axis_label_font("Packet Loss Rate")) helpers.ylabel(helpers.axis_label_font(r"$\mathbb{P}\{x \leq \mathcal{X}\}$")) helpers.save_figure(f"per-path-loss-cdf-{parameter_name}-{parameter_value}.pdf", num_cols=3)
def generate_learning_curve(training_data, validation_data, plot_name): # Extract the iteration count xs = [d_i["NumIters"] for d_i in training_data] ys = [d_i["loss"] for d_i in training_data] fig, ax1 = plt.subplots() first_line = helpers.plot_a_line(xs, ys, idx=0, label="training log loss", plot_markers=False, axis_to_plot_on=ax1) ax1.set_yticks([t_i for t_i in np.arange(0.0, 2.6, 0.5)]) helpers.xlabel("Iteration", ax=ax1) helpers.ylabel("Training loss", ax=ax1) ax2 = ax1.twinx() helpers.ylabel(r"Validation accuracy (\%)", ax=ax2) xs = [d_i["NumIters"] for d_i in validation_data] ys = [d_i["accuracy"] * 100 for d_i in validation_data] second_line = helpers.plot_a_line(xs, ys, idx=1, label="validation accuracy", plot_markers=False, axis_to_plot_on=ax2) ax2.set_ylim((82, 93)) ax2.set_yticks([t_i for t_i in range(82, 94, 1)]) ax1.legend(first_line + second_line, ["training log loss", "validation accuracy"], ncol=2, **cfg.LEGEND) helpers.save_figure(plot_name, num_cols=2, no_legend=True)
def generate_data_recovery_vs_param_plot(trial_provider, param_name, x_axis_label): param_selector = lambda t_i: t_i.get_parameter(param_name) sorted_trials = sorted(trial_provider, key=param_selector) xs = [] attacker_types = [ "random-path-hopping", "random-node-hopping" # , "ideal-random-path-hopping" , "one-node-per-path", "fixed", "planned" ] means = defaultdict(list) errs = defaultdict(list) attacker_data = {} fig, ax = plt.subplots() for param_value, param_group in itertools.groupby(sorted_trials, key=param_selector): param_group = list(param_group) ys = defaultdict(list) for trial in param_group: total_messages_sent = trial.get_parameter("sim_duration") for attacker_type in attacker_types: attacker_data[attacker_type] = trial.get_parameter( f"{attacker_type}-attacker-recovered-messages") for attacker_type, recovered_messages in attacker_data.items(): print( f"{attacker_type} recoverd {len(recovered_messages)} out of {total_messages_sent} messages" ) ys[attacker_type].append( (len(recovered_messages) / total_messages_sent) * 100) xs.append(param_value / 5.0) for attacker_type, y_vals in ys.items(): means[attacker_type].append(np.mean(y_vals)) errs[attacker_type].append(np.std(y_vals)) for plot_idx, (attacker_type, means) in enumerate(means.items()): helpers.plot_a_scatter(xs, means, idx=plot_idx, axis_to_plot_on=ax, label=attacker_type) # axins = ax.inset_axes([0.5, 0.6, 0.47, 0.37]) # axins.set_xscale("log") # ax.set_xscale("log") # x1, x2, y1, y2 = 0, 100, -10, 500 # axins.set_xlim(x1, x2) # axins.set_ylim(y1, y2) # axins.set_xticklabels("") # axins.set_yticklabels("") # helpers.plot_a_scatter(xs, random_path_means, idx=0, label="Random Path Attacker", # axis_to_plot_on=axins) # helpers.plot_a_scatter(xs, random_node_means, idx=1, label="Random Node Attacker", # axis_to_plot_on=axins) # helpers.plot_a_scatter(xs, one_node_per_path_means, idx=2, label="One Node per Path Attacker", # axis_to_plot_on=axins) # helpers.plot_a_scatter(xs, fixed_means, idx=3, label="Fixed Attacker", # axis_to_plot_on=axins) # ax.indicate_inset_zoom(axins, label=None) helpers.xlabel(x_axis_label) helpers.xlabel("Delay to hop period ratio") helpers.ylabel(r"\% of recovered messages") helpers.save_figure("attacker-simulation.pdf", num_cols=3)