def two_validate(a, b, c, d): emd = wasserstein_distance(a, b, c, d) energy = energy_distance(a, b, d, c) #print('EMD value is: ', emd) #print('Energy value is: ', energy) return emd, energy
def calculatePathDistance(self, pathA, pathB): courseNames = abstract.extractAllCourseNames([pathA, pathB]) semesterA, semesterB = abstract.createExtendedLookUpList( pathA, pathB, courseNames) distance = energy_distance(semesterA, semesterB) return distance
def time_energy_distance(self, n_size): distance = stats.energy_distance(self.u_values, self.v_values, self.u_weights, self.v_weights)
def get_energy_distance(path, save_path, seg_name): gt_folder_path = path + os.sep + "Ground_Truth" raw_folder_path = path + os.sep + "Original" seg_folder_path = path + os.sep + seg_name gt_image_list = os.listdir(gt_folder_path) columns = [ "Image", "Area", "Eccentricity", "Aspect Ratio", "Perimeter", "Solidity", "Number of branches", "Branch length", "Total branch length", "Curvature index", "Mean intensity" ] df = pd.DataFrame(columns=columns) list_area = [] list_ecc = [] list_ar = [] list_per = [] list_sol = [] list_nb = [] list_bl = [] list_tbl = [] list_ci = [] list_i = [] for image in gt_image_list: print(image) gt = cv2.imread(gt_folder_path + os.sep + image, cv2.IMREAD_GRAYSCALE) seg = cv2.imread(seg_folder_path + os.sep + image, cv2.IMREAD_GRAYSCALE) org = cv2.imread(raw_folder_path + os.sep + image, cv2.IMREAD_GRAYSCALE) # label image mask gt_labelled = label(gt) seg_labelled = label(seg) # Get region props of labelled images gt_reg_props = regionprops(label_image=gt_labelled, intensity_image=org, coordinates='xy') seg_reg_props = regionprops(label_image=seg_labelled, intensity_image=org, coordinates='xy') # compare shape descriptor distributions # Area gt_area = [i.area for i in gt_reg_props] seg_area = [i.area for i in seg_reg_props] list_area.append(energy_distance(gt_area, seg_area)) # Eccentricity gt_ecc = [i.eccentricity for i in gt_reg_props] seg_ecc = [i.eccentricity for i in seg_reg_props] list_ecc.append(energy_distance(gt_ecc, seg_ecc)) # Aspect ratio gt_ar = [ i.major_axis_length / i.minor_axis_length for i in gt_reg_props ] seg_ar = [ i.major_axis_length / i.minor_axis_length for i in seg_reg_props ] list_ar.append(energy_distance(gt_ar, seg_ar)) # Perimeter gt_per = [i.perimeter for i in gt_reg_props] seg_per = [i.perimeter for i in seg_reg_props] list_per.append(energy_distance(gt_per, seg_per)) # Solidity gt_sol = [i.solidity for i in gt_reg_props] seg_sol = [i.solidity for i in seg_reg_props] list_sol.append(energy_distance(gt_sol, seg_sol)) # branch descriptors gt_nb, gt_bl, gt_tbl, gt_ci = get_branch_meas(gt_folder_path + os.sep + image) seg_nb, seg_bl, seg_tbl, seg_ci = get_branch_meas(seg_folder_path + os.sep + image) list_nb.append(energy_distance(gt_nb, seg_nb)) list_bl.append(energy_distance(gt_bl, seg_bl)) list_tbl.append(energy_distance(gt_tbl, seg_tbl)) list_ci.append(energy_distance(gt_ci, seg_ci)) # Intensity gt_int = [i.mean_intensity for i in gt_reg_props] seg_int = [i.mean_intensity for i in seg_reg_props] list_i.append(energy_distance(gt_int, seg_int)) def show(gt, seg): sb.distplot(gt, color="green") sb.distplot(seg, color="red") plt.show() def norm(gt, seg): print(normaltest(gt)[1]) print(normaltest(seg)[1]) df["Image"] = gt_image_list df["Area"] = list_area df["Eccentricity"] = list_ecc df["Aspect Ratio"] = list_ar df["Perimeter"] = list_per df["Solidity"] = list_sol df["Number of branches"] = list_nb df["Branch length"] = list_bl df["Total branch length"] = list_tbl df["Curvature index"] = list_ci df["Mean intensity"] = list_i # raw data df.to_csv(save_path + os.sep + seg_name + "_EnergyDistance.csv")
def get_energy_distance(path, save_path, seg_name): gt_folder_path = path + "Ground_Truth" seg_folder_path = path + seg_name gt_image_list = os.listdir(gt_folder_path) seg_image_list = os.listdir(seg_folder_path) columns = [ "Image", "Area", "Eccentricity", "Aspect Ratio", "Perimeter", "Solidity" ] df = pd.DataFrame(columns=columns) list_area = [] list_ecc = [] list_ar = [] list_per = [] list_sol = [] ################################################### path_temp = "C:/Users/Christian/Desktop/Fourth_CV/Complete_images/MitoSegNet" gt_image_list = os.listdir(path_temp) seg_image_list = gt_image_list ################################################### for gt_image, seg_image in zip(gt_image_list, seg_image_list): print(gt_image) gt = cv2.imread(gt_folder_path + "/" + gt_image, cv2.IMREAD_GRAYSCALE) seg = cv2.imread(seg_folder_path + "/" + seg_image, cv2.IMREAD_GRAYSCALE) # label image mask gt_labelled = label(gt) seg_labelled = label(seg) # Get region props of labelled images gt_reg_props = regionprops(gt_labelled) seg_reg_props = regionprops(seg_labelled) # compare shape descriptor distributions ################################# # Area gt_area = [i.area for i in gt_reg_props] seg_area = [i.area for i in seg_reg_props] list_area.append((energy_distance(gt_area, seg_area))) # Eccentricity gt_ecc = [i.eccentricity for i in gt_reg_props] seg_ecc = [i.eccentricity for i in seg_reg_props] list_ecc.append((energy_distance(gt_ecc, seg_ecc))) # Aspect ratio gt_ar = [ i.major_axis_length / i.minor_axis_length for i in gt_reg_props ] seg_ar = [ i.major_axis_length / i.minor_axis_length for i in seg_reg_props ] list_ar.append((energy_distance(gt_ar, seg_ar))) # Perimeter gt_per = [i.perimeter for i in gt_reg_props] seg_per = [i.perimeter for i in seg_reg_props] list_per.append((energy_distance(gt_per, seg_per))) # Solidity gt_sol = [i.solidity for i in gt_reg_props] seg_sol = [i.solidity for i in seg_reg_props] list_sol.append((energy_distance(gt_sol, seg_sol))) ################################# def show(gt, seg): sb.distplot(gt, color="green") sb.distplot(seg, color="red") plt.show() def norm(gt, seg): print(normaltest(gt)[1]) print(normaltest(seg)[1]) df["Image"] = gt_image_list df["Area"] = list_area df["Eccentricity"] = list_ecc df["Aspect Ratio"] = list_ar df["Perimeter"] = list_per df["Solidity"] = list_sol total_values = len(gt_image_list) * 5 zero_p = 0 one_p = 0 two_p = 0 three_p = 0 for index, row in df.iterrows(): for column in row: if column == 0: zero_p += 1 elif column == 1: one_p += 1 elif column == 2: two_p += 1 elif column == 3: three_p += 1 # raw data df.to_csv(save_path + seg_name + "_EnergyDistance.csv")
def energy_dist(data1, data2): # data1 = data1.flatten() # data2 = data2.flatten() ene = energy_distance(data1, data2) print(ene) return str(ene)
def distance_function(data, metric="default", normalize=None): """ calculate distance between two distributions ---------- data: tuple of size 4, (post_hist, pre_hist, post_pre) metric: the metric of the distance metric normalize: normalize the inputs ------- returns the distance calculation """ if (data[0][0].shape != data[1][0].shape): raise Exception(" Histograms a and b not the same size a:{0} b:{1}".format(data[0][0].shape[0], data[1][0].shape[0])) return 99999.0 elif (data[2].shape != data[3].shape): raise Exception(" input data a and b not the same size a:{0} b:{1}".format(data[2].shape[0], data[3].shape[0])) return 99999.0 #normalize_measures = ["euclidean", "correlation", "manhattan","braycurtis", "hellinger", "kullbackleiber"] #if ( normalize == True or (normalize == None and name in normalize_measures)): #max_a = np.max(hista.shape[0]) #max_b = np.max(histb.shape[0]) #a = a / max_a #b = b / max_b hista = data[0][0] histb = data[1][0] binsa = data[0][1] binsb = data[1][1] binctr_a = 0.5*(binsa[1:]+binsa[:-1]) binctr_b = 0.5*(binsb[1:]+binsb[:-1]) #print(binctr_a.shape, binctr_b.shape, hista.shape, histb.shape, binsa.shape) if (metric == "default" or metric == "euclidean"): dist = distance.euclidean(hista, histb) elif(metric == "manhattan"): dist = distance.cityblock(hista, histb) elif(metric == "nonintersection"): dist = non_intersection(hista, histb) elif(metric == "kullbackleiber"): "Kullback-Leibler divergence D(P || Q) for discrete distributions" dist = stats.entropy(hista, histb)# switched for test elif(metric == "bhattacharyya"): dist = bhattacharyya(hista, histb) elif(metric == "matusita"): dist = matusita(hista, histb ) #elif(metric in ["nearestneighbor", "furthestneighbor"]): #cdist_calc = distance_matrix(data[2], data[3], metric_name="cityblock") #dist = intensitydistance(cdist_calc, metric) elif(metric == "meandistance"): dist = np.abs(data[2].mean() - data[3].mean()) elif(metric == "averagedistance"): dist = average_distance(data[2], data[3], metric_name="cityblock", chunksize=10**2 ) elif(metric == "jensenshannon"): dist = distance.jensenshannon(hista, histb) elif(metric == "correlation"): dist = distance.correlation(hista, histb) elif(metric =="braycurtis"): dist =distance.braycurtis(hista, histb) elif(metric == "hellinger"): dist = hellinger(hista, histb) elif(metric == "wasserstein"): dist = stats.wasserstein_distance(binctr_a, binctr_b, u_weights=hista, v_weights=histb) elif(metric == "energy"): dist = stats.energy_distance(binctr_a, binctr_b, u_weights=hista, v_weights=histb) elif(metric == "kolmogorovsmirnov"): res = stats.ks_2samp(hista, histb) dist = res.statistic elif(metric == "additivechi2"): res = metrics.pairwise.additive_chi2_kernel(np.atleast_2d(hista), np.atleast_2d(histb)) dist = res[0][0] elif(metric == "chi2_kernel"): res = metrics.pairwise.chi2_kernel(np.atleast_2d(hista), np.atleast_2d(histb)) dist = res[0][0] elif(metric == "MPDA"): dist = minimum_difference_pair_assignments(hista, histb) else: raise Exception("unknown distance measure {0}".format(metric)) #elif(metric == "earthmovers"): # dist = emd_function(a,b) return dist
def match_histograms(self, cur_all_model="all"): """ Compares the greyscale, RGB and HSV histograms of the query video with each of the saved average histograms using different distance metrics such as the Correlation, Intersection, Chi-Square Distance, Hellinger Distance, Earth's Mover Distance and Energy Distance metrics. Finally, prints the results for each histogram model and metric in a console table and writes the data to a CSV file. :param cur_all_model: the current histogram model when operating with all 3 models :return: None """ # variables used for finding the match to the recorded video video_match = "" video_match_value = 0 # get histogram for the recorded video to match - todo: calculate the histogram on the go query_histogram = dict() if config.model == "gray" or (cur_all_model == "gray" and config.model == "all"): query_histogram = { 'gray': np.loadtxt("../histogram_data/{}/hist-gray.txt".format( self.file_name), dtype=np.float32, unpack=False) } elif config.model == "rgb" or (cur_all_model == "rgb" and config.model == "all"): query_histogram = { 'b': np.loadtxt("../histogram_data/{}/hist-b.txt".format( self.file_name), dtype=np.float32, unpack=False), 'g': np.loadtxt("../histogram_data/{}/hist-g.txt".format( self.file_name), dtype=np.float32, unpack=False), 'r': np.loadtxt("../histogram_data/{}/hist-r.txt".format( self.file_name), dtype=np.float32, unpack=False) } elif config.model == "hsv" or (cur_all_model == "hsv" and config.model == "all"): hsv_data = np.loadtxt("../histogram_data/{}/hist-hsv.txt".format( self.file_name)) query_histogram = {'hsv': hsv_data.reshape((8, 12, 3))} # compare query histogram with each DB video histogram print("\n{} Histogram Comparison Results:\n".format( _get_chosen_model_string(cur_all_model))) method = "" csv_field_names = ["video", "score"] # use OpenCV's compareHist function for RGB and greyscale histograms (works with 2d arrays only) if config.model == "rgb" \ or config.model == "gray" \ or (cur_all_model == "gray" and config.model == "all") \ or (cur_all_model == "rgb" and config.model == "all"): for m in self.histcmp_methods: if m == 0: method = "CORRELATION" elif m == 1: method = "CHI-SQUARE" elif m == 2: method = "INTERSECTION" elif m == 3: method = "HELLINGER" # CSV file to write data to for each method if config.model == "all": csv_file = open( "../results/csv/{}-{}-{}.csv".format( config.model, cur_all_model, method), 'w') else: csv_file = open( "../results/csv/{}-{}.csv".format( config.model, method), 'w') with csv_file: writer = csv.DictWriter(csv_file, fieldnames=csv_field_names) writer.writeheader() table_data = list() for i, file in enumerate( get_video_filenames("../footage/")): comparison = 0 if config.model == "gray" or (cur_all_model == "gray" and config.model == "all"): dbvideo_greyscale_histogram = np.loadtxt( "../histogram_data/{}/hist-gray.txt".format( file), dtype=np.float32, unpack=False) comparison = cv2.compareHist( query_histogram['gray'], dbvideo_greyscale_histogram, m) elif config.model == "rgb" or (cur_all_model == "rgb" and config.model == "all"): dbvideo_b_histogram = np.loadtxt( "../histogram_data/{}/hist-b.txt".format(file), dtype=np.float32, unpack=False) dbvideo_g_histogram = np.loadtxt( "../histogram_data/{}/hist-g.txt".format(file), dtype=np.float32, unpack=False) dbvideo_r_histogram = np.loadtxt( "../histogram_data/{}/hist-r.txt".format(file), dtype=np.float32, unpack=False) comparison_b = cv2.compareHist( query_histogram['b'], dbvideo_b_histogram, m) comparison_g = cv2.compareHist( query_histogram['g'], dbvideo_g_histogram, m) comparison_r = cv2.compareHist( query_histogram['r'], dbvideo_r_histogram, m) comparison = (comparison_b + comparison_g + comparison_r) / 3 # append data to table table_data.append([file, round(comparison, 5)]) # write data to CSV file writer.writerow({ "video": file, "score": round(comparison, 5) }) if i == 0: video_match = file video_match_value = comparison else: # Higher score = better match (Correlation and Intersection) if m in [0, 2] and comparison > video_match_value: video_match = file video_match_value = comparison # Lower score = better match # (Chi-square, Alternative chi-square, Hellinger and Kullback-Leibler Divergence) elif m in [1, 3, 4, 5 ] and comparison < video_match_value: video_match = file video_match_value = comparison # append video match found to results list (using weights) if cur_all_model == "gray": for _ in range(0, self.histogram_comparison_weigths['gray'], 1): self.results_array.append(video_match) elif cur_all_model == "rgb": for _ in range(0, self.histogram_comparison_weigths['rgb'], 1): self.results_array.append(video_match) print_terminal_table(table_data, method) print("{} {} match found: ".format( _get_chosen_model_string(cur_all_model), method) + "\x1b[1;31m" + video_match + "\x1b[0m" + "\n\n") # use SciPy's statistical distances functions for HSV histograms (compareHist does not work with 3d arrays) elif config.model == "hsv" or config.model == "all": for m in self.histcmp_3d_methods: if m == "earths_mover_distance": method = "EARTH'S MOVER DISTANCE" elif m == "energy_distance": method = "ENERGY DISTANCE" # CSV file to write data to for each method if config.model == "all": csv_file = open( "../results/csv/{}-{}-{}.csv".format( config.model, cur_all_model, method), 'w') else: csv_file = open( "../results/csv/{}-{}.csv".format( config.model, method), 'w') with csv_file: writer = csv.DictWriter(csv_file, fieldnames=csv_field_names) writer.writeheader() table_data = list() for i, file in enumerate( get_video_filenames("../footage/")): dbvideo_hsv_histogram_data = np.loadtxt( "../histogram_data/{}/hist-hsv.txt".format(file)) dbvideo_hsv_histogram = dbvideo_hsv_histogram_data.reshape( (8, 12, 3)) comparison = 0 for h in range(0, self.bins[0]): # loop through hue bins for s in range(0, self.bins[1] ): # loop through saturation bins query_histogram_slice = query_histogram['hsv'][ h][s] dbvideo_histogram_slice = dbvideo_hsv_histogram[ h][s] if method == "EARTH'S MOVER DISTANCE": comparison += wasserstein_distance( query_histogram_slice, dbvideo_histogram_slice) elif method == "ENERGY DISTANCE": comparison += energy_distance( query_histogram_slice, dbvideo_histogram_slice) # append data to table table_data.append([file, round(comparison, 5)]) # write data to CSV file writer.writerow({ "video": file, "score": round(comparison, 5) }) if i == 0: video_match = file video_match_value = comparison else: if comparison < video_match_value: video_match = file video_match_value = comparison # append video match found to results list (using weights) for _ in range(0, self.histogram_comparison_weigths['hsv']): self.results_array.append(video_match) print_terminal_table(table_data, method) print("{} {} Match found: ".format( _get_chosen_model_string(cur_all_model), method) + "\x1b[1;31m" + video_match + "\x1b[0m" + "\n\n")
# %% # %% # %% col_ind = 1 sample0 = np.array(fed_fast_dataset[0].iloc[:, col_ind], dtype = float) sample1 = np.array(fed_fast_dataset[1].iloc[:, col_ind], dtype = float) sample2 = np.array(fed_fast_dataset[2].iloc[:, col_ind], dtype = float) wdist1 = scstats.wasserstein_distance(sample0, sample1) wdist2 = scstats.wasserstein_distance(sample0, sample2) # %% edist1 = scstats.energy_distance(sample0, sample1) edist2 = scstats.energy_distance(sample0, sample2) # %% edist1 # %% edist2 # %% kolmogorov_distance(sample0, sample1) # %% def calculate_distance_metrics(dataframe1, dataframe2, column_index, metric_functions, metric_names): data1 = np.array(dataframe1.iloc[column_index], float) data2 = np.array(dataframe2.iloc[column_index], float) metrics_dict = {name: f(data1, data2) for f, name in zip(metric_functions, metric_names)} metrics_df = pd.DataFrame.from_dict(metrics_dict, orient = "index") return metrics_df
def log_energy_dist(points_1, points_2): """ Computes log of 1 + energy_distance """ return np.log(1 + energy_distance(points_1, points_2))
def energy_coefficient(x, y): """0 for identical distribution; max 1""" D2 = stats.energy_distance(x, y) xv, yv = np.meshgrid(x, y) expected_abs_diff = np.abs(xv - yv).mean() return D2 / expected_abs_diff
def choose(self,a:LightCurve,b:LightCurve) -> float: from scipy.stats import energy_distance c1 = a.curve c2 = b.curve return energy_distance(c1,c2)
# Testing Script for Statistical Loss Function in Pytorch BATCH = 16 DIM = 32 # Making Data for Testing vec_1 = np.random.random((BATCH,DIM)) vec_2 = np.random.random((BATCH,DIM)) vec_list = np.arange(DIM) # Making Scipy Numpy Results result_1=0 result_2=0 for i in range(BATCH): vec_dist_1 = stats.wasserstein_distance(vec_list, vec_list, vec_1[i], vec_2[i]) vec_dist_2 = stats.energy_distance(vec_list,vec_list,vec_1[i],vec_2[i]) result_1 += vec_dist_1 result_2 += vec_dist_2 print("Numpy-Based Scipy Results: \n", "Wasserstein distance",result_1/BATCH,"\n", "Energy distance",result_2/BATCH,"\n") # Making Pytorch Variable Calculations tensor_1=Variable(torch.from_numpy(vec_1)) tensor_2=Variable(torch.from_numpy(vec_2),requires_grad=True) tensor_3=Variable(torch.rand(BATCH+1,DIM)) # Show results print("Pytorch-Based Results:") print("Wasserstein loss",stats_loss.torch_wasserstein_loss(tensor_1,tensor_2).data,stats_loss.torch_wasserstein_loss(tensor_1,tensor_2).requires_grad) print("Energy loss",stats_loss.torch_energy_loss(tensor_1,tensor_2).data,stats_loss.torch_wasserstein_loss(tensor_1,tensor_2).requires_grad)
def analyse_differences(training_ds, conditions, energies_inputted, variables_of_interest): for num, var in enumerate(variables_of_interest): all_p_values = [] distance = [[], []] varString = '' if (var == 's1'): varString = r'$S_1$' elif (var == 's2'): varString = r'$S_2$' else: varString = r'$f_{200}$' for i in range(len(energies_inputted)): #print('Observed whitney (U,P): %.6f %.6f' % mannwhitneyu(x, y)) x = np.array(conditions[num][i]) y = np.array(training_ds[i][var]) test_variance = lambda x, y: np.abs( moment(x, moment=2) / moment(y, moment=2)) test_man = lambda x, y: mannwhitneyu(x, y)[0] test_amean = lambda x, y: np.abs(np.mean(x) - np.mean(y)) test_gmean = lambda x, y: np.abs(gmean(x) - gmean(y)) test_ks = lambda x, y: ks_2samp(x, y)[0] test_chi = lambda x, y: chisquare( np.histogram(x, bins=50)[0], np.histogram(y, bins=50)[0])[0] p_value = permutation_test(x, y, method='approximate', num_rounds=1000, func=test_gmean, seed=0) x = x / max(y) y = normalize(y) distance[0].append([wasserstein_distance(x, y), i]) distance[1].append([energy_distance(x, y), i]) all_p_values.append([p_value, i]) #all_p_values_mean=[[0.001, 0], [0.0, 1], [0.0, 2], [0.0, 3], [0.0, 4], [0.0, 5], [0.0, 6], [0.0, 7], [0.0, 8], [0.0, 9], [0.0, 10], [0.0, 11], [0.0, 12], [0.0, 13], [0.0, 14], [0.0, 15], [0.0, 16], [0.0, 17], [0.0, 18], [0.0, 19], [0.0, 20], [0.0, 21], [0.0, 22], [0.0, 23], [0.0, 24], [0.0, 25], [0.0, 26], [0.0, 27], [0.0, 28], [0.0, 29], [0.0, 30], [0.0, 31], [0.0, 32], [0.0, 33], [0.0, 34], [0.0, 35], [0.0, 36], [0.0, 37], [0.0, 38], [0.0, 39], [0.0, 40], [0.0, 41], [0.0, 42], [0.0, 43], [0.0, 44], [0.0, 45], [0.0, 46], [0.0, 47], [0.0, 48], [0.0, 49], [0.0, 50], [0.0, 51], [0.0, 52], [0.0, 53], [0.0, 54], [0.0, 55], [0.0, 56], [0.0, 57], [0.0, 58], [0.0, 59], [0.0, 60], [0.0, 61], [0.0, 62], [0.0, 63], [0.0, 64], [0.0, 65], [0.0, 66], [0.0, 67], [0.0, 68], [0.0, 69], [0.0, 70], [0.0, 71], [0.0, 72], [0.0, 73], [0.0, 74], [0.0, 75], [0.022, 76], [0.0, 77], [0.571, 78], [0.444, 79], [0.198, 80], [0.0, 81], [0.0, 82], [0.0, 83], [0.0, 84], [0.009, 85], [0.116, 86], [0.0, 87], [0.0, 88], [0.0, 89], [0.066, 90], [0.009, 91], [0.0, 92], [0.0, 93], [0.101, 94], [0.0, 95], [0.0, 96], [0.0, 97], [0.0, 98], [0.036, 99], [0.19, 100], [0.004, 101], [0.207, 102], [0.651, 103], [0.416, 104], [0.0, 105], [0.9, 106], [0.0, 107], [0.0, 108], [0.232, 109], [0.0, 110], [0.766, 111], [0.0, 112], [0.138, 113], [0.928, 114], [0.554, 115], [0.0, 116], [0.125, 117], [0.0, 118], [0.347, 119], [0.01, 120], [0.0, 121], [0.0, 122], [0.41, 123], [0.046, 124], [0.53, 125], [0.066, 126], [0.0, 127], [0.04, 128], [0.0, 129], [0.004, 130], [0.0, 131], [0.0, 132], [0.022, 133], [0.091, 134], [0.005, 135], [0.298, 136], [0.307, 137], [0.0, 138], [0.055, 139], [0.001, 140], [0.518, 141], [0.425, 142], [0.0, 143], [0.447, 144], [0.0, 145], [0.0, 146], [0.179, 147], [0.0, 148], [0.0, 149], [0.0, 150], [0.364, 151], [0.0, 152], [0.0, 153], [0.0, 154], [0.075, 155], [0.0, 156], [0.0, 157], [0.0, 158], [0.0, 159], [0.0, 160], [0.172, 161], [0.0, 162], [0.0, 163], [0.053, 164], [0.0, 165], [0.0, 166], [0.029, 167], [0.0, 168], [0.0, 169], [0.161, 170], [0.006, 171], [0.0, 172], [0.975, 173], [0.692, 174], [0.0, 175], [0.503, 176], [0.071, 177], [0.938, 178], [0.003, 179], [0.006, 180], [0.0, 181], [0.0, 182], [0.118, 183], [0.105, 184], [0.0, 185], [0.0, 186], [0.0, 187], [0.0, 188], [0.066, 189], [0.27, 190], [0.038, 191], [0.793, 192], [0.931, 193], [0.107, 194], [0.0, 195], [0.0, 196], [0.149, 197], [0.603, 198], [0.0, 199], [0.0, 200], [0.204, 201], [0.0, 202], [0.068, 203], [0.0, 204], [0.0, 205], [0.297, 206], [0.769, 207], [0.675, 208], [0.035, 209], [0.0, 210], [0.006, 211], [0.0, 212], [0.0, 213], [0.0, 214], [0.0, 215], [0.0, 216], [0.0, 217], [0.0, 218], [0.0, 219], [0.0, 220], [0.0, 221], [0.0, 222], [0.0, 223], [0.0, 224], [0.0, 225], [0.0, 226], [0.0, 227], [0.0, 228], [0.0, 229]] x = [el[1] for el in all_p_values] y = [el[0] for el in all_p_values] x2 = [ all_p_values[el][1] for el in range(len(all_p_values)) if (all_p_values[el][0] == 0) ] y2 = [ all_p_values[el][0] for el in range(len(all_p_values)) if (all_p_values[el][0] == 0) ] accepted = np.where(np.array(y) > 0.5)[0] fig = plt.figure() ax = fig.add_subplot() ax2 = ax.twinx() print("Percentage in which p > 0.5 for %s : %.5f" % (var, 100 * (accepted.size) / len(y))) ax.scatter(x, y, marker='o', s=1.5, color='blue') ax.set_yscale('log') ax2.get_yaxis().set_visible(False) ax2.margins(0.01) ax2.scatter(x2, y2, marker='o', s=1.5, color='red', label='p=0\n(low statistics)') ax2.scatter([0], [1000000], color='white') ax2.legend(loc='center left') ax.set_xlabel("Nuclear Recoil Energy", size=11, labelpad=5) ax.set_ylabel("P value", size=11, labelpad=5) ax.set_title("Permutation test for \n" + varString + " on two-tailed geometric mean differences.") plt.savefig('./final_result/analysis/pval_gmean_test_' + var + '.png') plt.close() x = [el[1] for el in distance[0]] y = [el[0] for el in distance[0]] plt.scatter(x, y, marker='x', s=1.5, color='blue', label='All p values (100%)', alpha=0.5) x3 = [ distance[0][el][1] for el in range(len(all_p_values)) if (all_p_values[el][0] > 0.1) ] y3 = [ distance[0][el][0] for el in range(len(all_p_values)) if (all_p_values[el][0] > 0.1) ] plt.scatter(x3, y3, marker='x', s=1.5, color='green', label="p>0.1 (%.2f%%)" % (100 * (len(y3) / len(y)))) x2 = [ distance[0][el][1] for el in range(len(all_p_values)) if (all_p_values[el][0] > 0.5) ] y2 = [ distance[0][el][0] for el in range(len(all_p_values)) if (all_p_values[el][0] > 0.5) ] plt.scatter(x2, y2, marker='x', s=1.5, color='red', label="p>0.5 (%.2f%%)" % (100 * (len(y2) / len(y)))) plt.xlabel("Nuclear Recoil Energy (keV)", size=11, labelpad=5) plt.ylabel("Wasserstein Distance", size=11, labelpad=5) plt.legend() plt.title("Wasserstein distance for \n" + varString + " for varying recoil energies") plt.savefig('./final_result/analysis/wasserstein_distance_' + var + '.png') plt.close()