def make_scatter_plot(slice_idx_to_data, slice_idx_lower, slice_idx_upper, stats): # Select slice indices all_slice_indices = sorted(slice_idx_to_data.keys()) if slice_idx_lower is not None and slice_idx_upper is not None: slice_indices = all_slice_indices[slice_idx_lower:slice_idx_upper + 1] else: slice_indices = all_slice_indices kernel = ('wlst', 'logical_time', 5) idx_to_distances = { k: flatten_distance_matrix(v["kernel_distance"][kernel]) for k, v in slice_idx_to_data.items() } x_vals = [] y_vals = [] for slice_idx, distances in idx_to_distances.items(): base_x_val = slice_idx for d in distances: x_val = base_x_val + np.random.uniform(-0.25, 0.25) y_val = d x_vals.append(x_val) y_vals.append(y_val) fig, ax = plt.subplots() ax.scatter(x_vals, y_vals) x_axis_label = "% Messages Non-Deterministic" x_tick_labels = [ "0", "20", "30", "40", "50", "60", "70", "80", "90", "100" ] x_ticks = list(range(len(x_tick_labels))) ax.set_xticks(x_ticks) ax.set_xticklabels(x_tick_labels, rotation=45) ax.set_xlabel(x_axis_label) # Y-axis stuff y_axis_label = "Kernel Distance (Higher == Runs Less Similar)" ax.set_ylabel(y_axis_label) # Plot title plot_title = "Fraction of Messages Non-Deterministic vs. Kernel Distance" plt.title(plot_title) plt.show()
def get_distances_seq( slice_idx_to_data, slice_indices, kernel ): distance_mat_seq = [ ] for idx in slice_indices: distance_mat_seq.append( slice_idx_to_data[ idx ][ "kernel_distance" ][ kernel ] ) distances_seq = [ flatten_distance_matrix(dm) for dm in distance_mat_seq ] return distances_seq
def main( kdts_data_path, kernel_file_path, block_traffic_data_path=None, flagged_slices=None, kdts_ymax=None, mre_ymax=None, output="mini_amr_kdts.png"): # Read in kernel distance time series data with open( kdts_data_path, "rb" ) as infile: slice_idx_to_data = pkl.load( infile ) # Read in kernel definition file with open( kernel_file_path, "r" ) as infile: kernel = json.load(infile) # Unpack kernel distance time series data slice_indices = sorted( slice_idx_to_data.keys() ) kernel_key = kernel_json_to_key( kernel ) kernel_matrices = [ slice_idx_to_data[i]["kernel_distance"][kernel_key] for i in slice_indices ] kernel_distances = [ flatten_distance_matrix(km) for km in kernel_matrices ] # Get x-axis positions for kernel distance distribution boxes kdts_box_positions = slice_indices # Get boxplot data kdts_box_data = kernel_distances # Configure figure base_figure_size = (16, 9) figure_scale = 1.5 figure_size = [ dim * figure_scale for dim in base_figure_size ] # Make figure and axis for kernel distance time series boxplot fig, kdts_ax = plt.subplots( figsize = figure_size ) # Configure boxplot appearance box_width = 0.5 box_props = { "alpha" : 0.5 } flier_props = { "marker" : "+", "markersize" : 4 } if flagged_slices is not None: with open( flagged_slices, "rb" ) as infile: #flagged_indices = pkl.load( infile )["increasing_median"] # TODO undo hardcode flagged_indices = pkl.load( infile )["kolmogorov_smirnov"] # TODO undo hardcode non_flagged_box_positions = sorted(set(kdts_box_positions) - set(flagged_indices)) flagged_box_positions = sorted(flagged_indices) non_flagged_box_data = [ kdts_box_data[i] for i in non_flagged_box_positions ] flagged_box_data = [ kdts_box_data[i] for i in flagged_box_positions ] non_flagged_box_props = box_props flagged_box_props = { "alpha" : 0.5, "facecolor" : "r" } non_flagged_kdts_boxes = kdts_ax.boxplot( non_flagged_box_data, widths = box_width, positions = non_flagged_box_positions, patch_artist = True, showfliers = True, boxprops = non_flagged_box_props, flierprops = flier_props ) flagged_kdts_boxes = kdts_ax.boxplot( flagged_box_data, widths = box_width, positions = flagged_box_positions, patch_artist = True, showfliers = True, boxprops = flagged_box_props, flierprops = flier_props ) else: # Create base kernel distance boxplot kdts_boxes = kdts_ax.boxplot( kdts_box_data, widths = box_width, positions = kdts_box_positions, patch_artist = True, showfliers = True, boxprops = box_props, flierprops = flier_props ) # Read in mesh refinement block traffic data and plot, if available if block_traffic_data_path is not None: with open( block_traffic_data_path, "rb" ) as infile: block_traffic_data = pkl.load( infile ) # Unpack mesh_refinement_rate = block_traffic_data["mesh_refinement_rate"] mre_to_block_traffic = block_traffic_data["mre_to_block_traffic"] # Copy axis mre_ax = kdts_ax.twinx() # Get x-axis positions for block traffic data mre_data_positions = [ (x*mesh_refinement_rate)+x-1 for x in range( len( mre_to_block_traffic ) ) ][1:] # Get boxplot data mre_box_data = mre_to_block_traffic[1:] mre_data = [ np.mean(x) for x in mre_to_block_traffic ][1:] # Configure boxplot appearance mre_box_width = 0.5 mre_box_props = { "alpha" : 0.5, "facecolor" : "r" } mre_flier_props = { "marker" : "*", "markersize" : 4 } # Create MRE block traffic line plot mre_plot_handle = mre_ax.plot( mre_data_positions, mre_data, color="r", marker="o", linestyle="dashed", linewidth=2, markersize=12, label="Mesh Refinement Blocks Traffic" ) # Configure MRE y-axis appearance mre_ax.set_ylabel("Number of Blocks Transferred During Mesh Refinement") if mre_ymax is not None: mre_ax.set_ylim(0, mre_ymax) # Compute correlation coefficients between block traffic and kernel distance kernel_distance_seq = [] block_traffic_seq = [] for i in range(len(mre_data_positions)): distance_data = kdts_box_data[ mre_data_positions[i] ] block_traffic_data = mre_box_data[i] kernel_distance_seq.append( np.var( distance_data ) ) block_traffic_seq.append( np.median( block_traffic_data ) ) #for dist,traffic in zip(distance_data, block_traffic_data): # kernel_distance_seq.append(dist) # block_traffic_seq.append(traffic) pearson_r, pearson_p = pearsonr( block_traffic_seq, kernel_distance_seq ) spearman_r, spearman_p = spearmanr( block_traffic_seq, kernel_distance_seq ) pearson_correlation_txt = "Pearson's r = {}, p = {}\n".format(np.round(pearson_r, 2), pearson_p) spearman_correlation_txt = "Spearman's ρ = {}, p = {}\n".format(np.round(spearman_r, 2), spearman_p) print( pearson_correlation_txt ) print( spearman_correlation_txt ) # Configure axes text appearance tick_label_fontdict = { "fontsize" : 12 } # Configure x-axis appearance x_ticks = slice_indices if block_traffic_data_path is None: mesh_refinement_rate = 5 x_tick_labels = [ str(x+1) if (x+1) % mesh_refinement_rate == 0 else '' for x in x_ticks ] kdts_ax.set_xticks( x_ticks ) kdts_ax.set_xticklabels( x_tick_labels, rotation=0, fontdict=tick_label_fontdict ) x_axis_padding = 5 kdts_ax.set_xlim( -1*x_axis_padding, len(kdts_box_positions) + x_axis_padding ) kdts_ax.set_xlabel("Slice Index") # Configure kernel distance time series y-axis appearance kdts_ax.set_ylabel("Kernel Distance (Higher == Runs Less Similar)") if kdts_ymax is not None: kdts_ax.set_ylim(0, kdts_ymax) # Configure title appearance # TODO # Annotate # TODO # Configure legend appearance kdts_ax.legend( [ kdts_boxes["boxes"][0], mre_plot_handle[0] ], ["Kernel Distance Distrbutions", "Mesh Refinement Block Traffic"], loc="upper left" ) # Save figure plt.savefig( output, bbox_inches = "tight", pad_inches = 0.25 )
def main(kdts_path, kernel_path, pattern, ymax): # Load kernel distance time series with open(kdts_path, "rb") as infile: slice_idx_to_data = pkl.load(infile) # Load kernel definition with open(kernel_path, "r") as infile: kernel = json.load(infile) # Unpack kernel distance time series data slice_indices = sorted(slice_idx_to_data.keys()) kernel_key = kernel_json_to_key(kernel) kernel_matrices = [ slice_idx_to_data[i]["kernel_distance"][kernel_key] for i in slice_indices ] kernel_distances = [flatten_distance_matrix(km) for km in kernel_matrices] # Get scatter plot points scatter_x_vals, scatter_y_vals = get_scatter_plot_points(kernel_distances) # Package data for box plots bp_positions = [] bp_data = [] for i in range(len(kernel_distances)): bp_positions.append(i) bp_data.append(kernel_distances[i]) # Specify appearance of boxes box_width = 0.5 flierprops = {"marker": "+", "markersize": 4} boxprops = {"alpha": 1.0, "linewidth": 3, "color": "black"} # Specify appearance of scatter plot markers marker_size = 1 marker_color = "lightblue" aspect_ratio = "widescreen" figure_scale = 1.5 if aspect_ratio == "widescreen": base_figure_size = (16, 9) else: base_figure_size = (4, 3) figure_size = (figure_scale * base_figure_size[0], figure_scale * base_figure_size[1]) fig, ax = plt.subplots(figsize=figure_size) # Create box plots bp = ax.boxplot(bp_data, widths=box_width, positions=bp_positions, patch_artist=True, showfliers=False, boxprops=boxprops, flierprops=flierprops) # Overlay actual data points on same axis ax.scatter(scatter_x_vals, scatter_y_vals, s=marker_size, c=marker_color) # Plot annotation ( correlation coefficients ) nd_fractions = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] nd_fraction_seq = [] dist_seq = [] for i in range(len(nd_fractions)): for d in kernel_distances[i]: nd_fraction_seq.append(nd_fractions[i]) dist_seq.append(d) pearson_r, pearson_p = pearsonr(nd_fraction_seq, dist_seq) spearman_r, spearman_p = spearmanr(nd_fraction_seq, dist_seq) pearson_correlation_txt = "Pearson's r = {}, p = {}\n".format( np.round(pearson_r, 2), pearson_p) spearman_correlation_txt = "Spearman's rho = {}, p = {}\n".format( np.round(spearman_r, 2), spearman_p) print(pearson_correlation_txt) print(spearman_correlation_txt) annotation_lines = [ "Correlation Coefficients\n", pearson_correlation_txt, spearman_correlation_txt ] annotation_txt = "".join(annotation_lines) annotation_font_size = 18 ax.annotate(annotation_txt, xy=(0.45, 0.25), xycoords='axes fraction', fontsize=annotation_font_size, bbox=dict(boxstyle="square, pad=1", fc="w")) # Shared axis properties tick_label_fontdict = {"fontsize": 18} # X-axis properties x_tick_labels = [ "0", "10", "20", "30", "40", "50", "60", "70", "80", "90", "100" ] x_tick_labels = [x + "%" for x in x_tick_labels] x_ticks = list(range(len(x_tick_labels))) ax.set_xticks(x_ticks) ax.set_xticklabels(x_tick_labels, rotation=0, fontdict=tick_label_fontdict) # Y-axis properties y_ticks = [0, 10, 20, 30, 40, 50, 60, 70] y_tick_labels = [str(y) for y in y_ticks] ax.set_yticks(y_ticks) ax.set_yticklabels(y_tick_labels, rotation=0, fontdict=tick_label_fontdict) if ymax is not None: ax.ylim(0, ymax) # Axis labels x_axis_label = "Percentage of Wildcard Receives (i.e., using MPI_ANY_SOURCE)" y_axis_label = "Kernel Distance (Higher == Runs Less Similar)" axis_label_fontdict = {"fontsize": 18} ax.set_xlabel(x_axis_label, fontdict=axis_label_fontdict) ax.set_ylabel(y_axis_label, fontdict=axis_label_fontdict) # Annotate plot pattern_to_nice_name = { "message_race": "Message Race", "amg2013": "AMG2013", "mini_mcb_grid": "Mini-MCB Grid", "unstructured_mesh": "Unstructured Mesh" } if pattern is not None: plot_title = "Percentage of Wildcard Receives vs. Kernel Distance - Communication Pattern: {}".format( pattern_to_nice_name[pattern]) else: plot_title = "Percentage of Wildcard Receives vs. Kernel Distance" title_fontdict = {"fontsize": 20} plt.title(plot_title, fontdict=title_fontdict) if pattern is not None: save_path = "nd_fraction_vs_kernel_distance_{}.png".format(pattern) else: save_path = "nd_fraction_vs_kernel_distance.png" plt.savefig(save_path, bbox_inches="tight", pad_inches=0.25, dpi=600)
def main(kdts_path, nd_neighbor_fraction): # Read in kdts data with open(kdts_path, "rb") as infile: slice_idx_to_data = pkl.load(infile) kernel = ('wlst', 'logical_time', 5) idx_to_distances = { k: flatten_distance_matrix(v["kernel_distance"][kernel]) for k, v in slice_idx_to_data.items() } # Package data for scatter plot scatter_x_vals, scatter_y_vals = get_scatter_plot_points(idx_to_distances) # Package data for box-plots bp_positions = [] bp_data = [] for idx, distances in sorted(idx_to_distances.items()): bp_positions.append(idx) bp_data.append(distances) # Specify appearance of boxes box_width = 0.5 flierprops = {"marker": "+", "markersize": 4} boxprops = {"alpha": 0.25} # Specify appearance of scatter plot markers marker_size = 6 aspect_ratio = "widescreen" figure_scale = 1.5 if aspect_ratio == "widescreen": base_figure_size = (16, 9) else: base_figure_size = (4, 3) figure_size = (figure_scale * base_figure_size[0], figure_scale * base_figure_size[1]) fig, ax = plt.subplots(figsize=figure_size) # Create box plots bp = ax.boxplot(bp_data, widths=box_width, positions=bp_positions, patch_artist=True, showfliers=False, boxprops=boxprops, flierprops=flierprops) # Overlay actual data points on same axis ax.scatter(scatter_x_vals, scatter_y_vals, s=marker_size) # Plot annotation ( correlation coefficients ) nd_fractions = [0, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] nd_fraction_seq = [] dist_seq = [] for i in range(len(nd_fractions)): for d in idx_to_distances[i]: nd_fraction_seq.append(nd_fractions[i]) dist_seq.append(d) pearson_r, pearson_p = pearsonr(nd_fraction_seq, dist_seq) spearman_r, spearman_p = spearmanr(nd_fraction_seq, dist_seq) #pearson_correlation_txt = "Kernel distance vs. % ND → Pearson-R = {}, p = {}".format(np.round(pearson_r, 2), pearson_p) #spearman_correlation_txt = "Kernel distance vs. % ND → Spearman-R = {}, p = {}".format(np.round(spearman_r, 2), spearman_p) pearson_correlation_txt = "Pearson's r = {}, p = {}\n".format( np.round(pearson_r, 2), pearson_p) spearman_correlation_txt = "Spearman's rho = {}, p = {}\n".format( np.round(spearman_r, 2), spearman_p) print(pearson_correlation_txt) print(spearman_correlation_txt) annotation_lines = [ "Kernel Distance vs. % Wildcard Receives: Correlation Coefficients\n", #"=================================================================\n", pearson_correlation_txt, spearman_correlation_txt ] annotation_txt = "".join(annotation_lines) annotation_font_size = 18 #ax.annotate( annotation_txt, # xy=(0.55, 0.25), # xycoords='axes fraction', # fontsize=annotation_font_size, # bbox=dict(boxstyle="square, pad=1", fc="w") # ) # Tick labels tick_label_fontdict = {"fontsize": 12} x_tick_labels = [ "0", "20", "30", "40", "50", "60", "70", "80", "90", "100" ] x_ticks = list(range(len(x_tick_labels))) ax.set_xticks(x_ticks) ax.set_xticklabels(x_tick_labels, rotation=0, fontdict=tick_label_fontdict) #y_ticks = [ 0, 5, 10, 15, 20, 25, 30, 35, 40 ] #y_tick_labels = [ str(y) for y in y_ticks ] #ax.set_yticks( y_ticks ) #ax.set_yticklabels( y_tick_labels, rotation=0, fontdict=tick_label_fontdict ) ax.set_ylim(0, 175) # Axis labels x_axis_label = "Percentage of Wildcard Receives (i.e., using MPI_ANY_SOURCE)" y_axis_label = "Kernel Distance (Higher == Runs Less Similar)" axis_label_fontdict = {"fontsize": 18} ax.set_xlabel(x_axis_label, fontdict=axis_label_fontdict) ax.set_ylabel(y_axis_label, fontdict=axis_label_fontdict) # Plot Title plot_title = "Percentage of Wildcard Receives vs. Kernel Distance - Communication Pattern: Unstructured Mesh ({}% neighbors non-deterministically chosen )".format( int(nd_neighbor_fraction * 100)) title_fontdict = {"fontsize": 18} plt.title(plot_title, fontdict=title_fontdict) #plt.show() plt.savefig("unstructured_mesh_example.png", bbox_inches="tight", pad_inches=0.25)
def detect_anomalies( kernel_distance_seq, policy ): # Unpack policy policy_name = policy["name"] policy_params = policy["params"] # Do a truly naive anomaly detection policy where we just define the slice # containing the max kernel distance as anomalous and all others as not # anomalous. This is not really "anomaly detection" in any meaningful sense # But it suffices for testing the basic workflow if policy_name == "naive_max": max_dist_slice_idx = 0 max_dist = 0 for slice_idx,distance_mat in enumerate( kernel_distance_seq ): distances = get_flat_distances( distance_mat ) slice_max = max( distances) if max_distance_in_slice > max_dist: max_dist = slice_max max_dist_slice_idx = slice_idx return [ max_dist_slice_idx ] # Detect anomalies based on whether the median kernel distance increases # from slice to slice or not elif policy_name == "increasing_median": threshold = policy_params["threshold"] flagged_slice_indices = [] prev_median_distance = 0 curr_median_distance = 0 for slice_idx,distance_mat in enumerate( kernel_distance_seq ): distances = get_flat_distances( distance_mat ) curr_median_distance = np.median( distances ) #if curr_median_distance > prev_median_distance: if curr_median_distance - prev_median_distance > threshold: flagged_slice_indices.append( slice_idx ) prev_median_distance = curr_median_distance return flagged_slice_indices elif policy_name == "kolmogorov_smirnov": flagged_slice_indices = [] prev_distribution = None next_distribution = None for slice_idx in range(len(kernel_distance_seq))[1:-1]: prev_dist = flatten_distance_matrix(kernel_distance_seq[ slice_idx - 1 ]) curr_dist = flatten_distance_matrix(kernel_distance_seq[ slice_idx ]) next_dist = flatten_distance_matrix(kernel_distance_seq[ slice_idx + 1 ]) ks2_stat_prev, p_val_prev = ks_2samp( prev_dist, curr_dist ) ks2_stat_next, p_val_next = ks_2samp( next_dist, curr_dist ) thresh = 0.0001 if p_val_prev < thresh and p_val_next < thresh: flagged_slice_indices.append( slice_idx ) return flagged_slice_indices # Flag slices if the median kernel distance exceeds a user-supplied # threshold elif policy_name == "median_exceeds_threshold": threshold = policy_params[ "threshold" ] flagged_slice_indices = [] for slice_idx,distance_mat in enumerate( kernel_distance_seq ): distances = get_flat_distances( distance_mat ) median_distance = np.median( distances ) if median_distance > threshold: flagged_slice_indices.append( slice_idx ) return flagged_slice_indices # Randomly choose slices. This isn't really an anomaly detection policy, but # we use it to check whether the distribution of callstacks from a random # sample of slices looks different than the distribution of callstacks from # the flagged slices elif policy_name == "random": n_samples = policy_params["n_samples"] n_slices = len(kernel_distance_seq) n_generated = 0 flagged_slice_indices = set() while n_generated < n_samples: # generate uniform random number between 0 and n_slices-1 rand_slice_idx = np.random.randint( 0, n_slices, size=1 )[0] if rand_slice_idx not in flagged_slice_indices: flagged_slice_indices.add( rand_slice_idx ) n_generated += 1 return list( flagged_slice_indices ) elif policy_name == "all": n_slices = len(kernel_distance_seq) return list( range( n_slices ) ) elif policy_name == "ruptures_binary_segmentation": # Unpack policy model = policy_params[ "model" ] #width = policy_params[ "width" ] n_change_points = policy_params[ "n_change_points" ] penalty = policy_params[ "penalty" ] epsilon = policy_params[ "epsilon" ] # Get list of distance distributions distance_distribution_seq = [] for slice_idx,distance_mat in enumerate( kernel_distance_seq ): distances = get_flat_distances( distance_mat ) distance_distribution_seq.append( distances ) # Get some properties about the distances needed by Ruptures n_distributions = len( distance_distribution_seq ) dim = len( distances ) all_distances = [] for d in distance_distribution_seq: all_distances += d sigma = np.std( all_distances ) # Make into ndarray for ruptures #signal = np.array( [ np.array(d) for d in distance_distribution_seq ] ) signal = np.array( [ np.array(d) for d in distance_distribution_seq ] ) # Set up model algo = rpt.Binseg( model=model ).fit( signal ) # Find change-points if n_change_points == "unknown": if penalty == True and epsilon == False: penalty_value = np.log( n_distributions ) * dim * sigma**2 change_points = algo.predict( pen=penalty_value ) elif penalty == False and epsilon == True: threshold = 3 * n_distributions * sigma**2 change_points = algo.predict( epsilon=threshold ) else: raise ValueError("Invalid policy for window-based change-point detection: {}".format(policy_params)) else: change_points = algo.predict( n_bkps=n_change_points ) flagged_slice_indices = [ cp-1 for cp in change_points ] return flagged_slice_indices elif policy_name == "ruptures_window_based": # Unpack policy model = policy_params[ "model" ] width = policy_params[ "width" ] n_change_points = policy_params[ "n_change_points" ] penalty = policy_params[ "penalty" ] epsilon = policy_params[ "epsilon" ] # Get list of distance distributions distance_distribution_seq = [] for slice_idx,distance_mat in enumerate( kernel_distance_seq ): distances = get_flat_distances( distance_mat ) distance_distribution_seq.append( distances ) # Get some properties about the distances needed by Ruptures n_distributions = len( distance_distribution_seq ) dim = len( distances ) all_distances = [] for d in distance_distribution_seq: all_distances += d sigma = np.std( all_distances ) # Make into ndarray for ruptures signal = np.array( [ np.array(d) for d in distance_distribution_seq ] ) # Set up model algo = rpt.Window( width=width, model=model ).fit( signal ) # Find change-points if n_change_points == "unknown": if penalty == True and epsilon == False: penalty_value = np.log( n_distributions ) * dim * sigma**2 change_points = algo.predict( pen=penalty_value ) elif penalty == False and epsilon == True: threshold = 3 * n_distributions * sigma**2 change_points = algo.predict( epsilon=threshold ) else: raise ValueError("Invalid policy for window-based change-point detection: {}".format(policy_params)) else: change_points = algo.predict( n_bkps=n_change_points ) flagged_slice_indices = [ cp-1 for cp in change_points ] return flagged_slice_indices else: raise NotImplementedError("Anomaly detection policy: {} is not implemented".format(policy_name))
def main( kdts_path, pattern, output, kernel_path, nd_start, nd_iter, nd_end, nd_frac ): # Read in kdts data with open( kdts_path, "rb" ) as infile: slice_idx_to_data = pkl.load( infile ) with open(kernel_path, "r" ) as infile: kernel = json.load(infile) # Unpack kernel distance time series data slice_indices = sorted( slice_idx_to_data.keys() ) kernel_key = kernel_json_to_key( kernel ) kernel_matrices = [ slice_idx_to_data[i]["kernel_distance"][kernel_key] for i in slice_indices ] idx_to_distances = [ flatten_distance_matrix(km) for km in kernel_matrices ] # Package data for scatter plot scatter_x_vals, scatter_y_vals = get_scatter_plot_points( idx_to_distances ) # Package data for box-plots bp_positions = [] bp_data = [] for i in range( len(idx_to_distances) ): bp_positions.append( i ) bp_data.append( idx_to_distances[i] ) # Specify appearance of boxes box_width = 0.8 flierprops = { "marker" : "+", "markersize" : 4 } boxprops = { "alpha" : 0.5, "facecolor" : "tab:brown" } whiskerprops = { "linewidth" : 3 } # Specify appearance of scatter plot markers marker_size = 6 marker_color = "b" alpha_value = 0.5 aspect_ratio = "widescreen" figure_scale = 1.5 if aspect_ratio == "widescreen": base_figure_size = (16, 9) else: base_figure_size = (4, 3) figure_size = (figure_scale*base_figure_size[0], figure_scale*base_figure_size[1] ) fig,ax = plt.subplots( figsize=figure_size ) # Create box plots #bp = ax.boxplot( bp_data, # widths=box_width, # positions=bp_positions, # patch_artist=True, # showfliers=False, # boxprops=boxprops, # whiskerprops=whiskerprops, # flierprops=flierprops ) #bp_quantiles = [[0.25, 0.5, 0.75] for i in range(len(bp_positions))] bp = ax.violinplot( bp_data, widths=box_width, positions=bp_positions, showmedians=True, showextrema=True ) for sprops in bp['bodies']: #sprops.set_facecolor('#D43F3A') sprops.set_facecolor('tab:olive') sprops.set_edgecolor('black') sprops.set_alpha(1) #bp['cquantiles'].set_edgecolors('black') #bp['cquantiles'].set_linewidths(2.5) bp['cbars'].set_linewidths(2.5) bp['cbars'].set_edgecolors('black') bp['cmins'].set_linewidths(2.5) bp['cmins'].set_edgecolors('black') bp['cmaxes'].set_linewidths(2.5) bp['cmaxes'].set_edgecolors('black') bp['cmedians'].set_linewidths(3.5) bp['cmedians'].set_edgecolors('black') # Overlay actual data points on same axis #ax.scatter( scatter_x_vals, # scatter_y_vals, # s=marker_size, # c=marker_color, # alpha=alpha_value) quartile1, medians, quartile3 = np.percentile(bp_data, [25, 50, 75], axis=1) #whiskers = np.array([ # adjacent_values(sorted_array, q1, q3) # for sorted_array, q1, q3 in zip(bp_data, quartile1, quartile3)]) #whiskers_min, whiskers_max = whiskers[:, 0], whiskers[:, 1] inds = np.arange(1, len(medians) + 1) #ax.scatter(inds, medians, marker='o', color='white', s=30, zorder=3) #ax.vlines(inds, quartile1, quartile3, color='k', linestyle='-', lw=5) #ax.vlines(inds, whiskers_min, whiskers_max, color='k', linestyle='-', lw=1) plt.ylim(ymin=0) # Plot annotation ( correlation coefficients ) if ( nd_iter == 0 ): step_count = 0; else: step_count = int((nd_end - nd_start)/nd_iter); nd_fractions = [round(nd_start + (nd_iter * step_num), 2) for step_num in range(step_count + 1)] #nd_fractions = [0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1] nd_fraction_seq = [] dist_seq = [] for i in range( len( nd_fractions ) ): for d in idx_to_distances[i]: nd_fraction_seq.append( nd_fractions[i] ) dist_seq.append( d ) if ( len(nd_fraction_seq) > 1 ): pearson_r, pearson_p = pearsonr( nd_fraction_seq, dist_seq ) spearman_r, spearman_p = spearmanr( nd_fraction_seq, dist_seq ) #pearson_correlation_txt = "Kernel distance vs. % ND → Pearson-R = {}, p = {}".format(np.round(pearson_r, 2), pearson_p) #spearman_correlation_txt = "Kernel distance vs. % ND → Spearman-R = {}, p = {}".format(np.round(spearman_r, 2), spearman_p) pearson_correlation_txt = "Your Pearson's r value = {}\n".format(np.round(pearson_r, 2)) pearson_p_txt = "It's corresponding p value = {}\n".format(pearson_p) spearman_correlation_txt = "Your Spearman's ρ value = {}\n".format(np.round(spearman_r, 2)) spearman_p_txt = "It's corresponding p value = {}\n".format(spearman_p) print( pearson_correlation_txt ) print( pearson_p_txt) print( "\n" ) print( spearman_correlation_txt ) print( spearman_p_txt) annotation_lines = [ "Kernel Distance vs. % Non-Deterministic Receives: Correlation Coefficients\n", #"=================================================================\n", pearson_correlation_txt, spearman_correlation_txt ] annotation_txt = "".join(annotation_lines) annotation_font_size = 18 #ax.annotate( annotation_txt, # xy=(0.55, 0.25), # xycoords='axes fraction', # fontsize=annotation_font_size, # bbox=dict(boxstyle="square, pad=1", fc="w") # ) # Tick labels tick_label_fontdict = {"fontsize" : 16} x_tick_labels = [ str(int(100 * nd_fractions[index])) for index in range(step_count + 1)] #x_tick_labels = [ "0", "10", "20", "30", "40", "50", "60", "70", "80", "90", "100" ] x_ticks = list(range(len(x_tick_labels))) ax.set_xticks( x_ticks ) ax.set_xticklabels( x_tick_labels, rotation=0, fontdict=tick_label_fontdict ) y_ticks = list(range(0,int(max(scatter_y_vals))+11,10)) y_tick_labels = [ str(y) for y in y_ticks ] ax.set_yticks( y_ticks ) ax.set_yticklabels( y_tick_labels, rotation=0, fontdict=tick_label_fontdict ) # Axis labels x_axis_label = "Percentage of Message Non-Determinism in Application" y_axis_label = "Kernel Distance (Higher == Runs Less Similar)" axis_label_fontdict = {"fontsize" : 20} ax.set_xlabel( x_axis_label, fontdict=axis_label_fontdict ) ax.set_ylabel( y_axis_label, fontdict=axis_label_fontdict ) # Plot Title name_dict = { "message_race" : "Message Race", "amg2013" : "AMG2013", "unstructured_mesh" : "Unstructured Mesh" } #if pattern == "unstructured_mesh": #plot_title = "Percentage of Message Non-Determinism vs. Kernel Distance - Communication Pattern: {} ({}% neighbors non-deterministically chosen )".format(name_dict[pattern], nd_frac) #else: #plot_title = "Percentage of Message Non-Determinism vs. Kernel Distance - Communication Pattern: {}".format(name_dict[pattern]) #title_fontdict = {"fontsize" : 22} #plt.title( plot_title, fontdict=title_fontdict ) #plt.show() plt.savefig( "{}.png".format(output), bbox_inches="tight", pad_inches=0.25 )