def distance_matrices_ext(samples, metric, metric_params=None, ordn=2): pdist = PairwiseDistance(metric=metric, metric_params=metric_params, order=None, n_jobs=-1) D = pdist.fit_transform(samples) scale = (D[:, :, 0].max() - D[:, :, 0].min()) / (D[:, :, 1].max() - D[:, :, 1].min()) Dscaled = np.stack((D[:, :, 0], D[:, :, 1] * scale), axis=2) Dn = np.linalg.norm(D, ord=ordn, axis=2) Dscaledn = np.linalg.norm(Dscaled, ord=ordn, axis=2) plt.figure(figsize=(8, 8)) #plot H0 and H1 distances separated plt.subplot(221) plt.imshow(D[:, :, 0]) plt.axis('off') plt.colorbar(shrink=0.8) plt.title('H0') plt.subplot(222) plt.imshow(D[:, :, 1]) plt.axis('off') plt.colorbar(shrink=0.8) plt.title('H1') #plot complessive distance (whith or whithout scaling) plt.subplot(223) plt.imshow(Dn) plt.axis('off') plt.colorbar(shrink=0.8) plt.title('Not scaled, norm=' + str(ordn) + '-norm') plt.subplot(224) plt.imshow(Dscaledn) plt.axis('off') plt.colorbar(shrink=0.8) plt.title('Scaled, norm=' + str(ordn) + '-norm') plt.suptitle(metric)
def test_not_fitted(): dd = PairwiseDistance() da = Amplitude() with pytest.raises(NotFittedError): dd.transform(X_1) with pytest.raises(NotFittedError): da.transform(X_1)
def compute_distance_matrix( diagrams, metric, metric_params, plot_distance_matrix=False, title=None, file_prefix=None, ): PD = PairwiseDistance(metric=metric, metric_params=metric_params, order=None, n_jobs=N_JOBS) X_distance = PD.fit_transform(diagrams) return X_distance
def distance_matrices(samples, metric, metric_params=None, ordn=2): pdist = PairwiseDistance(metric=metric, metric_params=metric_params, order=None, n_jobs=2) D = pdist.fit_transform(samples) scale = (D[:, :, 0].max() - D[:, :, 0].min()) / (D[:, :, 1].max() - D[:, :, 1].min()) Dscaled = np.stack((D[:, :, 0], D[:, :, 1] * scale), axis=2) Dn = np.linalg.norm(D, ord=ordn, axis=2) Dscaledn = np.linalg.norm(Dscaled, ord=ordn, axis=2) plt.figure(figsize=(10, 5)) plt.subplot(121) plt.imshow(Dn) plt.axis('off') plt.colorbar() plt.title('Not scaled, norm=' + str(ordn) + '-norm') plt.subplot(122) plt.imshow(Dscaledn) plt.axis('off') plt.colorbar() plt.title('Scaled, norm=' + str(ordn) + '-norm') plt.suptitle(metric)
def test_dd_transform(metric, metric_params, order, n_jobs): # X_fit == X_transform dd = PairwiseDistance(metric=metric, metric_params=metric_params, order=order, n_jobs=n_jobs) X_res = dd.fit_transform(X1) assert (X_res.shape[0], X_res.shape[1]) == (X1.shape[0], X1.shape[0]) if order is None: assert X_res.shape[2] == n_homology_dimensions # X_fit != X_transform dd = PairwiseDistance(metric=metric, metric_params=metric_params, order=order, n_jobs=n_jobs) X_res = dd.fit(X1).transform(X2) assert (X_res.shape[0], X_res.shape[1]) == (X2.shape[0], X1.shape[0]) if order is None: assert X_res.shape[2] == n_homology_dimensions # X_fit != X_transform, default metric_params dd = PairwiseDistance(metric=metric, order=order, n_jobs=n_jobs) X_res = dd.fit(X1).transform(X2) assert (X_res.shape[0], X_res.shape[1]) == (X2.shape[0], X1.shape[0]) if order is None: assert X_res.shape[2] == n_homology_dimensions
def test_dd_transform(metric, metric_params, order, n_jobs): # X_fit == X_transform dd = PairwiseDistance(metric=metric, metric_params=metric_params, order=order, n_jobs=n_jobs) X_res = dd.fit_transform(X_1) assert (X_res.shape[0], X_res.shape[1]) == (X_1.shape[0], X_1.shape[0]) # X_fit != X_transform dd = PairwiseDistance(metric=metric, metric_params=metric_params, order=order, n_jobs=n_jobs) X_res = dd.fit(X_1).transform(X_2) assert (X_res.shape[0], X_res.shape[1]) == (X_1.shape[0], X_2.shape[0]) if order is None: assert X_res.shape[2] == len(np.unique(X_2[:, :, 2])) # X_fit != X_transform, default metric_params dd = PairwiseDistance(metric=metric, order=order, n_jobs=n_jobs) X_res = dd.fit(X_1).transform(X_2) assert (X_res.shape[0], X_res.shape[1]) == (X_1.shape[0], X_2.shape[0])
def get_pairwise_distance_metrics(): metrics = { "bottleneck": PairwiseDistance(metric="bottleneck"), "wasserstein": PairwiseDistance(metric="wasserstein", metric_params={"p": 1}), "betti": PairwiseDistance(metric="betti"), "landscape": PairwiseDistance(metric="landscape"), "silhouette": PairwiseDistance(metric="silhouette"), "heat": PairwiseDistance(metric="heat"), "persistence_image": PairwiseDistance(metric="persistence_image"), } return metrics
def main(): path_to_diags = "../data/collected_diagnoses_complete.json" with open(path_to_diags) as f: diagnoses = json.load(f) patients = list(diagnoses.keys()) # Sort diagnoses key diagnoses = collections.OrderedDict(sorted(diagnoses.items())) # Where the data comes from data_dir = DOTENV_KEY2VAL["DATA_DIR"] + "/patch_91/" # Where the resulting distance matrices are saved. distance_dir = "/temporal_evolution/" utils.make_dir(DOTENV_KEY2VAL["GEN_DATA_DIR"] + distance_dir) distances_to_evaluate = [ # "bottleneck", # "wasserstein", # "betti", "landscape", # "silhouette", # "heat", "persistence_image", ] # patients = ["sub-ADNI011S0023", "sub-ADNI029S0878"] # If we want to process multiple patients, we just throw them in a loop. for i, patient in tqdm(enumerate(patients), total=len(patients)): for distance in distances_to_evaluate: patches = [] for mri in diagnoses[patient]: try: patches.append( np.load( data_dir + patient + mri.replace("ses", "") + "-MNI.npy" ) ) except FileNotFoundError: pass # print( # data_dir # + patient # + mri.replace("ses", "") # + "-MNI.npy" # + " not found" # ) # Stacking enables multiprocessing patches = np.stack(patches) cp = CubicalPersistence( homology_dimensions=HOMOLOGY_DIMENSIONS, coeff=2, periodic_dimensions=None, infinity_values=None, reduced_homology=True, n_jobs=-1, ) diagrams_cubical_persistence = cp.fit_transform(patches) pl_dist = PairwiseDistance( metric=distance, metric_params=None, order=None, n_jobs=-1, ) X_distance = pl_dist.fit_transform( diagrams_cubical_persistence ) with open( DOTENV_KEY2VAL["GEN_DATA_DIR"] + distance_dir + f"patient_evolution_distance_data_patient_{patient}" f"_{distance}.npy", "wb", ) as f: np.save(f, X_distance)
def compute_all_distances(X, metric): if metric == "euclidean": return squareform(pdist(X, metric)) else: return PairwiseDistance(metric=metric, n_jobs=self.n_jobs).fit_transform(X)
def compute_distance(emb2diagram, metric: str = "bottleneck"): dis = PairwiseDistance(metric=metric) dis.fit(emb2diagram["SBert"]) dis.transform(emb2diagram["LF"]) return dis
[0, 1, 0.], [0, 0, 1.]], # Expected bottleneck ampl: [1, 0] [[3, 3.5, 0.], [0, 0, 0.], [5, 9, 1.]] # Expected bottleneck ampl: [1/4, 2] ]) X_bottleneck_res_exp = np.array([ [1/2, 2], [1, 0], [1/4, 2] ]) @pytest.mark.parametrize('transformer', [PairwiseDistance(), Amplitude()]) def test_not_fitted(transformer): with pytest.raises(NotFittedError): transformer.transform(X1) parameters_distance = [ ('bottleneck', None), ('wasserstein', {'p': 2, 'delta': 0.1}), ('betti', {'p': 2.1, 'n_bins': 10}), ('landscape', {'p': 2.1, 'n_bins': 10, 'n_layers': 2}), ('silhouette', {'p': 2.1, 'power': 1.2, 'n_bins': 10}), ('heat', {'p': 2.1, 'sigma': 0.5, 'n_bins': 10}), ('persistence_image', {'p': 2.1, 'sigma': 0.5, 'n_bins': 10}), ('persistence_image',
def main(): path_to_diags = "../data/collected_diagnoses_complete.json" patients = ["sub-ADNI005S0223"] progr = ["cn_mci_ad"] with open(path_to_diags) as f: diagnoses = json.load(f) # Sort diagnoses key diagnoses = collections.OrderedDict(sorted(diagnoses.items())) # Where the data comes from data_dir = DOTENV_KEY2VAL["DATA_DIR"] + "/patch_91/" # Where the figures are saved temporal_progression_dir = "/temporal_evolution/" utils.make_dir( DOTENV_KEY2VAL["GEN_FIGURES_DIR"] + temporal_progression_dir ) # Where the resulting distance matrices are saved. time_series_dir = "/temporal_evolution/" utils.make_dir(DOTENV_KEY2VAL["GEN_DATA_DIR"] + temporal_progression_dir) # If we want to process multiple patients, we just throw them in a loop. for i, patient in enumerate(patients): print( "Processing longitudinal data for " + patient + " with progression pattern " + progr[i] ) patches = [] for mri in diagnoses[patient]: try: patches.append( np.load( data_dir + patient + mri.replace("ses", "") + "-MNI.npy" ) ) except FileNotFoundError: print( data_dir + patient + mri.replace("ses", "") + "-MNI.npy" + " not found" ) # Stacking enables multiprocessing patches = np.stack(patches) cp = CubicalPersistence( homology_dimensions=HOMOLOGY_DIMENSIONS, coeff=2, periodic_dimensions=None, infinity_values=None, reduced_homology=True, n_jobs=-1, ) diagrams_cubical_persistence = cp.fit_transform(patches) pl_dist = PairwiseDistance( metric="landscape", metric_params=None, order=None, n_jobs=-1 ) X_distance = pl_dist.fit_transform( diagrams_cubical_persistence ) with open( DOTENV_KEY2VAL["GEN_DATA_DIR"] + time_series_dir + f"distance_data_patient_{patient}_{progr[i]}_landscape.npy", "wb", ) as f: np.save(f, X_distance)