def generate_mean_link_utilization_over_time_plot(parameter_name, parameter_value, trials): """ Generate a graph that shows the mean utilization across all the links over time for each trial in the trial provider """ path_capacity = 50.0 for trial_idx, the_trial in enumerate(trials): print(f"generate_mean_utilization_over_time_plot: {trial_idx}, {the_trial.name}") link_utilization_over_time = the_trial.get_parameter("link-utilization-over-time") data_for_links = {link_tuple: util_list for link_tuple, util_list in link_tuple_to_util_list(link_utilization_over_time).items() if link_tuple[0] == "of:0000000000000001"} ys = {link_tuple: [min(path_capacity, util_val) / path_capacity for util_val in util_val_list] for link_tuple, util_val_list in data_for_links.items()} # The next line assumes that the same number of network snapshots were captured # for each of the links, I think this will always happen but this will throw # if that is not the case. throughputs_over_time = [np.mean([util_list[time_idx] for util_list in ys.values()]) for time_idx in range(len(next(iter(data_for_links.values()))))] xs = [idx for idx in range(len(next(iter(data_for_links.values()))))] helpers.plot_a_scatter(xs, throughputs_over_time, idx=trial_idx, label=the_trial.name) helpers.xlabel(helpers.axis_label_font("Time")) helpers.ylabel(helpers.axis_label_font("Mean link utilization")) helpers.save_figure(f"mean-utilization-over-time-{parameter_name}-{parameter_value}.pdf", num_cols=3)
def generate_mean_throughput_over_time_plot(parameter_name, parameter_value, trials): """ Generate a graph that shows the mean throughput across all the links over time for each trial in trial provider. """ path_capacity = 50.0 for trial_idx, the_trial in enumerate(trials): print(f"generate_mean_throughput_over_time: {trial_idx}, {the_trial.name}") # number_of_paths = the_trial.get_parameter("number-of-paths") link_utilization_over_time = the_trial.get_parameter("link-utilization-over-time") data_for_links = {link_tuple: util_list for link_tuple, util_list in link_tuple_to_util_list(link_utilization_over_time).items() if link_tuple[0] == "of:0000000000000001"} ys = {link_tuple: [min(path_capacity, util_val) for util_val in util_val_list] for link_tuple, util_val_list in data_for_links.items()} throughputs_over_time = [] for time_idx in range(len(next(iter(data_for_links.values())))): total_throughput = sum(util_list[time_idx] for util_list in ys.values()) throughputs_over_time.append(total_throughput) xs = [idx for idx in range(len(next(iter(data_for_links.values()))))] helpers.plot_a_scatter(xs, throughputs_over_time, idx=trial_idx, label=the_trial.name) helpers.xlabel(helpers.axis_label_font("Time")) helpers.ylabel(helpers.axis_label_font("Mean throughput (Mi-bps)")) helpers.save_figure(f"throughput-over-time-{parameter_name}-{parameter_value}.pdf", num_cols=3)
def generate_computed_link_utilization_box_plot(trial_provider): grouped_by_name = collect_trials_based_on_name(trial_provider) labels = [helpers.axis_label_font(multiflow_label(group[0].name)) for group in grouped_by_name] # Make lists of link utilization data link_utilization_data = [reduce(op.add, [list(t_i.get_parameter("measured-link-utilization").values()) for t_i in group if t_i.has_parameter("measured-link-utilization")], []) for group in grouped_by_name] plot_a_box_plot(link_utilization_data, labels) plt.ylabel(helpers.axis_label_font("Link utilization")) node_selection_type = trial_provider.get_metadata("node-selection-type") helpers.save_figure("computed-link-utilization-%s-box.pdf" % node_selection_type, no_legend=True)
def generate_attacker_vs_k_scatter(): def attacker_setting(trial_dict): return trial_dict["experiment"]["attacker_setting"] def attacker_timestep(trial_dict): return trial_dict["experiment"]["attacker_timestep"] results_file = MININET_RESULTS_DIR / "results_attacker_k" / "results.log" results_dicts = [ eval(s_i) for s_i in results_file.read_text().splitlines() ] # pp.pprint(results_dicts) print(len(results_dicts)) # pp.pprint([attacker_timestep(d_i) for d_i in results_dicts]) fixed_attacker_trials = [ d_i for d_i in results_dicts if attacker_setting(d_i) == "fixed" ] unsynced_attacker_trials = [ d_i for d_i in results_dicts if attacker_setting(d_i) == "hop_independent" ] synced_attacker_trials = [ d_i for d_i in results_dicts if attacker_setting(d_i) == "hop_sync" ] trial_data_list = [ fixed_attacker_trials, unsynced_attacker_trials, synced_attacker_trials ] trial_names = ["Fixed", "Independent", "Synced"] k_selector = lambda d_i: d_i["experiment"]["k"] for plot_idx, (trial_name, trial_data) in enumerate(zip(trial_names, trial_data_list)): scatter_points = compute_mean_data_recovery(trial_data, k_selector) # pp.pprint(scatter_points) xs = [t_i[0] for t_i in scatter_points] ys = [t_i[1] / 1000 for t_i in scatter_points] # to KB helpers.plot_a_scatter(xs, ys, idx=plot_idx, label=helpers.legend_font(trial_name)) plt.xlabel(helpers.axis_label_font("$K$")) plt.ylabel(helpers.axis_label_font("Kilobytes")) helpers.save_figure("attacker.pdf", num_cols=len(trial_data_list))
def generate_computed_link_utilization_cdf(trial_provider): grouped_by_name = collect_trials_based_on_name(trial_provider) labels = [multiflow_label(group[0].name) for group in grouped_by_name] link_utilizations = [reduce(op.add, [list(t_i.get_parameter("measured-link-utilization").values()) for t_i in group if t_i.has_parameter("measured-link-utilization")], []) for group in grouped_by_name] for plot_idx in range(len(labels)): sorted_link_utilization_data = sorted(link_utilizations[plot_idx]) plot_a_cdf(sorted_link_utilization_data, idx=plot_idx, label=helpers.legend_font(labels[plot_idx])) plt.xlabel(helpers.axis_label_font("Link Utilization")) plt.ylabel(helpers.axis_label_font(r"$\mathbb{P}\{x \leq \mathcal{X}\}$")) node_selection_type = trial_provider.get_metadata("node-selection-type") legend_kwargs = dict(cfg.LEGEND) helpers.save_figure("computed-link-utilization-%s-cdf.pdf" % node_selection_type, num_cols=len(trial_provider), legend_kwargs=legend_kwargs)
def generate_link_utilization_cdf(parameter_name, parameter_value, trials): """ Generate a CDF that shows the mean utilization of each link for every trial in the provider. """ link_capacity = 50.0 # Mi-bps for idx, trial in enumerate(trials): print(f"generate_link_utilization_cdf: {idx}, {trial.name}") utilization_results = trial.get_parameter("byte-counts-over-time") links = get_link_set(utilization_results) # print(f"Number of links based on utilization results: {len(links)}") mean_network_utilization = trial.get_parameter("measured-link-utilization") link_utilizations = sorted([link_throughput / link_capacity for link_throughput in mean_network_utilization.values()]) helpers.plot_a_cdf(link_utilizations, label=trial.name, idx=idx) helpers.xlabel(helpers.axis_label_font("Link Utilization")) helpers.ylabel(helpers.axis_label_font(r"$\mathbb{P}\{x < \mathcal{X}$\}")) plt.legend(ncol=len(trials)//2, **cfg.LEGEND) helpers.save_figure(f"link-utilization-cdf-{parameter_name}-{parameter_value}.pdf", no_legend=True)
def generate_per_path_packet_loss_cdf(parameter_name, parameter_value, trials): """ For each trial generate a cdf of total packet loss ((i.e. total packets sent - total packets received) / total packets sent) """ for trial_idx, the_trial in enumerate(trials): print(f"generate_per_packet_loss_cdf: {trial_idx}, {the_trial.name}") end_host_results = the_trial.get_parameter("end-host-results") sender_results = end_host_results[0]["sender"] # print("Sender results:\n") # pp.pprint(sender_results) receiver_results = end_host_results[1]["receiver"] # print("Receiver results:\n") # pp.pprint(receiver_results) link_loss_rates = [] flow_id_selector = lambda ss: ss["flow_id"] sender_results = sorted(list(sender_results.values()), key=flow_id_selector) for flow_id, flows_with_id in itertools.groupby(sender_results, flow_id_selector): total_sender_packets_for_path = 0 total_receiver_packets_for_path = 0 for the_flow in flows_with_id: source_port = the_flow["src_port"] total_sender_packets_for_path += the_flow["pkt_count"] total_receiver_packets_for_path += sum([packet_count for receiver_info, packet_count in receiver_results.items() if receiver_info[1] == source_port]) link_loss_rate = (total_sender_packets_for_path - total_receiver_packets_for_path) \ / total_sender_packets_for_path link_loss_rates.append(link_loss_rate) helpers.plot_a_cdf(sorted(link_loss_rates), idx=trial_idx, label=the_trial.name) helpers.xlabel(helpers.axis_label_font("Packet Loss Rate")) helpers.ylabel(helpers.axis_label_font(r"$\mathbb{P}\{x \leq \mathcal{X}\}$")) helpers.save_figure(f"per-path-loss-cdf-{parameter_name}-{parameter_value}.pdf", num_cols=3)
def plot_share_delay_cdf_across_ks(plot_name, packet_info_dir): """ Generate share delay cumulative distribution function plots for a fixed set of K values. """ k_to_packets = {} for k_value in range(3, 9): packets_file_path = packet_info_dir.joinpath(path.Path("packet-dump-info-%d.txt" % k_value)) k_to_packets[k_value] = read_packet_dump_info_from_file(packets_file_path) for plot_idx, (k_value, packets) in enumerate(k_to_packets.items()): plot_share_delay_cdf(packets, label=helpers.legend_font(r"$K$=%d" % k_value), idx=plot_idx) title_string = r"$n=10$, $\lambda=50$, $\delta=100ms$, $latency=\mathcal{U}(0ms, 250ms)$, $jitter=50ms$" # plt.title(title_string) plt.xlabel(helpers.axis_label_font(r"Inter-share delay ($ms$)")) plt.ylabel(helpers.axis_label_font(r"$\mathbb{P}\{x < \mathcal{X}\}$")) legend_kwargs = dict(cfg.LEGEND) legend_kwargs["loc"] = "upper center" legend_kwargs["bbox_to_anchor"] = (0.5, 1.2) helpers.save_figure("inter-share-latency-%s-cdf.pdf" % plot_name, num_cols=(len(k_to_packets)//2), legend_kwargs=legend_kwargs)
def generate_goodput_vs_message_size_scatter(): results_file = MININET_RESULTS_DIR / "results_msgsize" / "results.log" results_dicts = [ eval(s_i) for s_i in results_file.read_text().splitlines() ] host_hopping_trials = [ d_i for d_i in results_dicts if d_i["experiment"]["hop_method"] == "host" ] net_hopping_trials = [ d_i for d_i in results_dicts if d_i["experiment"]["hop_method"] == "net" ] msg_size_selector = lambda d_i: d_i["experiment"]["msg_size"] scatter_points = compute_mean_goodput(host_hopping_trials, msg_size_selector) xs = [t_i[0] for t_i in scatter_points] ys = [t_i[1] for t_i in scatter_points] helpers.plot_a_scatter(xs, ys, idx=0, label=helpers.legend_font("Host hopping")) scatter_points = compute_mean_goodput(net_hopping_trials, msg_size_selector) xs = [t_i[0] for t_i in scatter_points] ys = [t_i[1] for t_i in scatter_points] helpers.plot_a_scatter(xs, ys, idx=1, label=helpers.legend_font("Net hopping")) plt.xlabel(helpers.axis_label_font("Message size (Bytes)")) plt.ylabel(helpers.axis_label_font("Mbps")) helpers.save_figure("msg_goodput.pdf", num_cols=2)
def generate_loss_vs_timestep_plot(): host_hopping = [(100, 0.0), (250, 0.0), (500, 0.0), (1000, 0.0), (5000, 0.0), (10000, 0.0)] net_hopping = [(100, 0.32), (250, 0.14), (500, 0.093), (1000, 0.025), (5000, 0.015), (10000, 0.0)] trial_names = ["Host hopping", "Net hopping"] trial_data_lists = [host_hopping, net_hopping] for plot_idx, (trial_name, trial_data) in enumerate(zip(trial_names, trial_data_lists)): xs = [t_i[0] for t_i in trial_data] ys = [t_i[1] for t_i in trial_data] helpers.plot_a_scatter(xs, ys, idx=plot_idx, label=helpers.legend_font(trial_name)) plt.xlim((0.0, 10000)) plt.ylim((0.0, 0.35)) plt.xlabel(helpers.axis_label_font("Timestep (ms)")) plt.ylabel(helpers.axis_label_font("Packet loss rate")) helpers.save_figure("loss-plot.pdf", num_cols=2)
def generate_flow_count_bar_plot(trial_provider): grouped_by_name = collect_trials_based_on_name(trial_provider) allocated_flows_data = [[len(t_i.get_parameter("flow-set")) for t_i in group] for group in grouped_by_name] bar_heights = [np.mean(d_i) for d_i in allocated_flows_data] bar_errors = [np.std(d_i) for d_i in allocated_flows_data] labels = [helpers.axis_label_font(multiflow_label(group[0].name)) for group in grouped_by_name] # bar_heights = [len(the_trial.get_parameter("flow-set")) for the_trial in trial_provider] # labels = [the_trial.name for the_trial in trial_provider] bar_x_locations = np.arange(len(grouped_by_name)) for plot_idx in range(len(bar_x_locations)): plt.bar(bar_x_locations[plot_idx], bar_heights[plot_idx], color=helpers.bar_color(plot_idx), tick_label=labels[plot_idx], hatch=helpers.bar_texture(plot_idx), yerr=bar_errors[plot_idx], capsize=5.0) plt.ylabel(helpers.axis_label_font(r"\# of admitted flows")) plt.xticks(bar_x_locations, labels) node_selection_type = trial_provider.get_metadata("node-selection-type") helpers.save_figure("admitted-flows-%s-bar.pdf" % node_selection_type, num_cols=len(trial_provider), no_legend=True)
def generate_number_of_time_periods_shares_were_active_pdf(set_of_traces_to_plot, trace_names): """ Generate a plot of the probability density function of the number of \delta ms time periods that shares for a particular sequence number were present in the network. A single PDF is generated and plotted for each ofthe traces in <set_of_traces_to_plot>. """ bar_width = 0.35 possible_x_values = set() for bar_idx, (trace_name, packets) in enumerate(zip(trace_names, set_of_traces_to_plot)): packets = sorted(packets, key=lambda p_i: p_i.timestamp) delta = 100 * 10**3 interval_start = packets[0].timestamp current_time = packets[0].timestamp seq_num_to_list_of_intervals = defaultdict(list) interval_index = 0 for p_i in packets: current_time = p_i.timestamp if (current_time - interval_start) > delta: interval_index += 1 interval_start = current_time seq_num_to_list_of_intervals[p_i.seq_num].append(interval_index) seq_num_to_interval_count = {} for seq_num, list_of_intervals in seq_num_to_list_of_intervals.items(): seq_num_to_interval_count[seq_num] = (max(list_of_intervals) - \ min(list_of_intervals)) + 1 counted_data = list(Counter(seq_num_to_interval_count.values()).items()) hist_data_for_trace = sorted(counted_data, key=lambda kvp_i: kvp_i[0]) possible_x_values = possible_x_values | set([t_i[0] for t_i in hist_data_for_trace]) vector_sum = sum((t_i[1] for t_i in hist_data_for_trace)) normed_hist_data_for_trace = [t_i[1] / vector_sum for t_i in hist_data_for_trace] bar_x_locations = [t_i[0] + (bar_width * bar_idx) for t_i in hist_data_for_trace] helpers.plot_a_bar(bar_x_locations, normed_hist_data_for_trace, idx=bar_idx, bar_width=bar_width, label=helpers.legend_font(trace_name)) x_tick_labels = list(sorted(possible_x_values)) x_tick_locations = [x_i + ((bar_width/2) * (len(set_of_traces_to_plot)-1)) for x_i in x_tick_labels] plt.xticks(x_tick_locations, x_tick_labels) # plt.xlabel(r"Number of $\delta$ms intervals sequence shares were present in the network") plt.ylabel(helpers.axis_label_font(r"$\mathbb{P}\{x = \mathcal{X}\}$")) helpers.save_figure("share-presence-pdf.pdf", num_cols=len(set_of_traces_to_plot))
def generate_active_paths_per_interval_plot(set_of_traces_to_plot, trace_names): """ Generate a plot of the probability density function of the number of active paths per \delta ms interval. A path is defined as being active if it is carrying shares for any sequence number. """ bar_width = 0.4 possible_x_values = set() for bar_idx, (trace_name, packets) in enumerate(zip(trace_names, set_of_traces_to_plot)): packets = sorted(packets, key=lambda p_i: p_i.timestamp) delta = 100 * 10**3 # microseconds interval_start = packets[0].timestamp current_time = packets[0].timestamp active_ports_in_interval = set() number_of_active_paths_per_interval = [] for p_i in packets: current_time = p_i.timestamp if (current_time - interval_start) > delta: number_of_active_paths_per_interval.append(len(active_ports_in_interval)) active_ports_in_interval = set() interval_start = current_time active_ports_in_interval.add((p_i.source_port, p_i.destination_port)) counted_data = list(Counter(number_of_active_paths_per_interval).items()) hist_data_for_trace = sorted(counted_data, key=lambda kvp_i: kvp_i[0]) possible_x_values = possible_x_values | set([t_i[0] for t_i in hist_data_for_trace]) vector_sum = sum([t_i[1] for t_i in hist_data_for_trace]) normed_hist_data_for_trace = [t_i[1] / vector_sum for t_i in hist_data_for_trace] bar_x_locations = [t_i[0] + (bar_width * bar_idx) for t_i in hist_data_for_trace] helpers.plot_a_bar(bar_x_locations, normed_hist_data_for_trace, idx=bar_idx, bar_width=bar_width, label=helpers.legend_font(trace_name)) # x_tick_labels = list(sorted(possible_x_values)) x_tick_labels = np.arange(min(possible_x_values), max(possible_x_values) + 1) x_tick_locations = [x_i + ((bar_width/2) * (len(set_of_traces_to_plot)-1)) for x_i in x_tick_labels] plt.xticks(x_tick_locations, x_tick_labels) plt.ylabel(helpers.axis_label_font(r"$\mathbb{P}\{x = \mathcal{X}\}$")) helpers.save_figure("active-paths-histogram.pdf", num_cols=len(set_of_traces_to_plot))
def generate_link_utilization_box_plot(parameter_name, parameter_value, trials): """ Generate a box and whisker blot that shows the mean utilization of every link for every trial in the provider. """ def plot_a_box_plot(data_vectors, vector_labels): bp = plt.boxplot(data_vectors, labels=vector_labels, whiskerprops={"linestyle": "--"}, flierprops={"marker": "x", "markerfacecolor": "red", "markeredgecolor": "red"}) plt.setp(bp["boxes"], color="blue") plt.setp(bp["medians"], color="red") box_plot_data = [] labels = [] for trial_idx, the_trial in enumerate(trials): print(f"generate_link_utilization_box_plot: {trial_idx}, {the_trial.name}") labels.append(helpers.axis_label_font(the_trial.name)) mean_link_utilization = the_trial.get_parameter("measured-link-utilization") box_plot_data.append(list(mean_link_utilization.values())) plot_a_box_plot(box_plot_data, labels) helpers.save_figure(f"link-utilization-box-plot-{parameter_name}-{parameter_value}.pdf", no_legend=True)
def plot_best_and_worst_paths_cdf(capture_name, packets, **kwargs): """ Generate a plot showing the cumulative distribution function of the inter-share delay for the 5 best and 5 worst K-sized sets of paths in the network. In this context The terms best and worst refer to the K-sized sets of paths with the smallest and largest inter-share delay respectively. """ def configure_plot(figure): pass # want to group by the sequence number and the set of ports key_fn = lambda p_i: p_i.seq_num packets = sorted(packets, key=key_fn) groups = itertools.groupby(packets, key_fn) port_group_to_isd = defaultdict(list) for seq_num, packets_with_seq_num in groups: packet_group = list(packets_with_seq_num) port_key = tuple(sorted([p_i.destination_port for p_i in packet_group])) ts_selector = lambda p_i: p_i.timestamp inter_share_delay = ts_selector(max(packet_group, key=ts_selector)) - \ ts_selector(min(packet_group, key=ts_selector)) port_group_to_isd[port_key].append(inter_share_delay / 1000) def compute_95th_percentile(data): data = sorted(data) idx = int(np.floor(0.95 * len(data))) return data[idx] fig, axs = plt.subplots(2, sharex=True) best_paths_axis = axs[0] worst_paths_axis = axs[1] large_bold = lambda phrase: helpers.LARGE(helpers.bf(phrase)) best_paths_axis.text(0, 0.90, large_bold("Best paths")) worst_paths_axis.text(0, 0.90, large_bold("Worst paths")) sorted_paths = sorted([(port_key, inter_share_delays) for port_key, inter_share_delays in port_group_to_isd.items()], key=lambda t_i: np.mean(t_i[1])) best_paths = sorted_paths[:5] worst_paths = sorted_paths[len(sorted_paths)-5:] for plot_idx, (port_group, inter_share_delays) in enumerate(best_paths): helpers.plot_a_cdf(sorted(inter_share_delays), idx=plot_idx, plot_markers=False, axis_to_plot_on=best_paths_axis, label_data=False) percentile_label = helpers.legend_font(r"$95$th-Percentile") all_latencies = list(itertools.chain(*[b_i[1] for b_i in best_paths])) percentile_95 = compute_95th_percentile(all_latencies) best_paths_axis.axvline(percentile_95, color="red", linestyle="--", label=percentile_label) legend_params = dict(cfg.LEGEND) legend_params["loc"] = "upper center" legend_params["bbox_to_anchor"] = (0.5, 1.3) best_paths_axis.legend(ncol=1, **legend_params) for plot_idx, (port_group, inter_share_delays) in enumerate(worst_paths): helpers.plot_a_cdf(sorted(inter_share_delays), idx=plot_idx, plot_markers=False, axis_to_plot_on=worst_paths_axis, label_data=False) all_latencies = list(itertools.chain(*[w_i[1] for w_i in worst_paths])) percentile_95 = compute_95th_percentile(all_latencies) worst_paths_axis.axvline(percentile_95, color="red", linestyle="--", label=percentile_label) y_label_str = helpers.axis_label_font(r"$\mathbb{P}\{x < \mathcal{X}\}$") worst_paths_axis.set_xlabel(helpers.axis_label_font(r"Inter-Share Delay ($ms$)")) worst_paths_axis.set_ylabel(y_label_str) best_paths_axis.set_ylabel(y_label_str) configure_plot(fig) axis_to_plot = [best_paths_axis, worst_paths_axis] helpers.save_subfigure_plot("best-and-worst-paths-%s-cdf.pdf" % capture_name, axis_to_plot, no_legend=True)