def test_verbose_true(self): data = np.array([[0.0, 0.0], [10.0, 0.0], [10.0, 10.0], [0.0, 10.0]]) rips = Rips(verbose=True) dgm = rips.fit_transform(data) assert len(dgm) == 2 assert dgm[0].shape == (4, 2) assert dgm[1].shape == (1, 2)
def subj_to_pims(sbj, sdict, px, sd, nobs): """ generate persistence images for all gestures for a given subject INPUTS sbj - subject number sdict - dict of gestures performed by a subject px - pixel dimension/ resolution; e.g. px=20 gives 20x20 persistence image sd - persistence image concentration parameter (gaussian) nobs - number of observations per subj (total num of gests they performed) OUTPUTS array of gestures made by subject """ # instantiate persistence image generator & vietoris-rips complex generators rips = Rips(maxdim=1, verbose=False) # 1-D homology rips complex pim = PersImage(pixels=[px, px], spread=sd) # each vector have equal # persim pix + 2 cols for subj & gest labels res_mat = np.zeros(px**2 * nobs + 2 * nobs).reshape(nobs, -1) v = 0 for gnum, garray in sdict.items(): # generate rips complex on points; slice out time col and gesture label dgms = rips.fit_transform(garray[:, 1:-1]) img = pim.transform(dgms[1]) # persistence image of 1 cycles obs_vec = np.r_[img.flatten(), int(gnum[0]), int(sbj)] res_mat[v, :] = obs_vec v += 1 return res_mat
def computeVR(data, path_to_save): rips = Rips() diagrams = rips.fit_transform(data) rips.plot(diagrams) plt.savefig(path_to_save, dpi=200) plt.show() plt.close()
def test_sparse(self): np.random.seed(10) thresh = 1.1 # Do dense filtration with threshold data = ( datasets.make_circles(n_samples=100)[0] + 5 * datasets.make_circles(n_samples=100)[0] ) rips0 = Rips(thresh=thresh, maxdim=1) dgms0 = rips0.fit_transform(data) # Convert to sparse matrix first based on threshold, # then do full filtration rips1 = Rips(maxdim=1) D = makeSparseDM(data, thresh) dgms1 = rips1.fit_transform(D, distance_matrix=True) # The same number of edges should have been added assert rips0.num_edges_ == rips1.num_edges_ I10 = dgms0[1] I11 = dgms1[1] idx = np.argsort(I10[:, 0]) I10 = I10[idx, :] idx = np.argsort(I11[:, 0]) I11 = I11[idx, :] assert np.allclose(I10, I11)
def test_non_square_dist_matrix(self): rips = Rips() data = np.random.random((3, 10)) with pytest.raises(Exception): rips.transform(data, distance_matrix=True)
def gen_pd(dir_layers, layer_no): dir_layers += '/layer_' + str(layer_no) # Read pickle data layer = [] with open( '{dir_layers}/all.pickle'.format(dir_layers=dir_layers, layer_no=layer_no), 'rb') as f: layer = pickle.load(f) print(layer.shape, layer[0].shape) # Generate Vietoris-Rips Complex rips = Rips(verbose=False) diagrams = rips.fit_transform(layer) fig = plt.figure() # Save persistence diagrams as image for k, diagram in enumerate(diagrams): plt.scatter(diagram[:, 0], diagram[:, 1]) for r, row in enumerate(diagram): # rotate entries 45 degrees diagram[r] = np.array([(row[0] + row[1]) / sqrt(2), (row[1] - row[0]) / sqrt(2)]) plt.title("Layer {0}".format(layer_no)) plt.savefig("images/layer_{layer_no}.png".format(layer_no=layer_no)) plt.clf() # Save rotated persistence diagrams as image for k, diagram in enumerate(diagrams): plt.scatter(diagram[:, 0], diagram[:, 1]) plt.title("Layer 5 Landscape") plt.savefig("{landscape_dir}/layer_{layer_no}.png".format( landscape_dir=landscape_dir, layer_no=layer_no)) plt.clf() print("done")
def test_fastrips(n=100, dim=2): t0 = time.time() rips = Rips() data = np.random.random((n, dim)) diagrams = rips.fit_transform(data) rips.plot(diagrams, title='n = %s, d = %s' % (n, d)) print('Computing rips of %s nodes at dim %s takes %s' % (n, dim, pf(time.time() - t0)))
def test_defaults(self): data = np.random.random((100, 3)) rips = Rips() dgm = rips.fit_transform(data) assert len(dgm) == 2 assert rips.coeff == 2
def tda_features(nodes): # rips = Rips(maxdim=2) feat_cols = ['feat-{}'.format(i) for i in range(nodes.shape[1])] embeds = pd.DataFrame(nodes, columns=feat_cols) rips = Rips() scaler = MinMaxScaler() # Transform print("Generating rips barcodes... This may take a while.") diagrams = rips.fit_transform(embeds) birth_dim0 = diagrams[0][:, 0] birth_dim1 = diagrams[1][:, 0] lifetime_dim0_pts = diagrams[0][:, 1] - diagrams[0][:, 0] lifetime_dim1_pts = diagrams[1][:, 1] - diagrams[1][:, 0] # Replace NaN in dim0 i = np.argwhere(~np.isfinite(lifetime_dim0_pts)) if (len(i) > 0): print('Cleaning dim0...') lifetime_dim0_pts[i] = lifetime_dim0_pts.min( ) # Set NaNs to lowest real value lifetime_dim0_pts[i] = lifetime_dim0_pts.max( ) + 1.0 # Replace NaNs with largest value # Replace NaN in dim0 i = np.argwhere(~np.isfinite(lifetime_dim1_pts)) if (len(i) > 0): print('Cleaning dim1...') lifetime_dim1_pts[i] = lifetime_dim1_pts.min( ) # Set NaNs to lowest real value lifetime_dim1_pts[i] = lifetime_dim1_pts.max( ) + 1.0 # Replace NaNs with largest value # Remove 0s birth_dim0[birth_dim0 <= 0] = 1e-7 birth_dim1[birth_dim1 <= 0] = 1e-7 # Weight birth times birth_dim0 = np.reciprocal(birth_dim0) birth_dim1 = np.reciprocal(birth_dim1) # MinMax scaling birth_dim0 = scaler.fit_transform(birth_dim0.reshape(-1, 1)).flatten() birth_dim1 = scaler.fit_transform(birth_dim1.reshape(-1, 1)).flatten() lifetime_dim0_pts = scaler.fit_transform(lifetime_dim0_pts.reshape( -1, 1)).flatten() lifetime_dim1_pts = scaler.fit_transform(lifetime_dim1_pts.reshape( -1, 1)).flatten() # Concatenate tda features to embeds embeds['birth_dim0'] = pd.Series(data=birth_dim0) embeds['lifetime_dim0'] = pd.Series(data=lifetime_dim0_pts) embeds['birth_dim1'] = pd.Series(data=birth_dim1) embeds['lifetime_dim1'] = pd.Series(data=lifetime_dim1_pts) # embeds['birth_dim2'] = pd.Series(data=diagrams[2][:, 0]) # embeds['lifetime_dim2'] = pd.Series(data=lifetime_dim2_pts) embeds.fillna(0, inplace=True) return embeds.values
def generatePD(allTimeDelayedSeg, segmentLength, embDim, tau, PCA_n_components, norm, savePD): rips = Rips(maxdim=1, coeff=2) diagrams_h1 = [rips.fit_transform(data)[1] for data in allTimeDelayedSeg] if savePD == True: np.save( 'PD_Len%s_Dim%s_Tau%s_PCA%s_%s.npy' % (segmentLength, embDim, tau, PCA_n_components, norm), diagrams_h1) return diagrams_h1
def test_infty(self): rips = Rips() diagrams = [ np.array([[0, np.inf], [1, 1], [2, 4], [3, 5]]), np.array([[0.5, 3], [2, 4], [4, 5], [10, 15]]) ] f, ax = plt.subplots() rips.plot(diagrams, legend=True, show=False)
def test_thresh(self): np.random.seed(3100) data = np.random.random((100, 3)) rips0 = Rips(thresh=0.1) dgm0 = rips0.fit_transform(data) rips1 = Rips(thresh=1) dgm1 = rips1.fit_transform(data) # Barcode of H_1 diagram will be smaller, right? assert len(dgm0[1]) < len(dgm1[1]), "Usually"
def test_coeff(self): np.random.seed(3100) data = np.random.random((100, 3)) rips3 = Rips(coeff=3) dgm3 = rips3.fit_transform(data) rips2 = Rips(coeff=2) dgm2 = rips2.fit_transform(data) assert ( dgm2 is not dgm3 ), "This is a vacuous assertion, we only care that the above operations did not throw errors"
def test_input_warnings(self): rips = Rips() data = np.random.random((3, 10)) with pytest.warns(UserWarning, match="has more columns than rows") as w: rips.transform(data) data = np.random.random((3, 3)) with pytest.warns( UserWarning, match="input matrix is square, but the distance_matrix" ) as w: rips.transform(data)
def test_default_label(self): rips = Rips() diagrams = [ np.array([[0, 1], [1, 1], [2, 4], [3, 5]]), np.array([[0.5, 3], [2, 4], [4, 5], [10, 15]]) ] f, ax = plt.subplots() rips.plot(diagrams, show=False) assert ax.get_ylabel() == 'Death' assert ax.get_xlabel() == 'Birth'
def test_default_square(self): rips = Rips() diagrams = [ np.array([[0, 1], [1, 1], [2, 4], [3, 5]]), np.array([[0.5, 3], [2, 4], [4, 5], [10, 15]]) ] f, ax = plt.subplots() rips.plot(diagrams, show=False) diagonal = ax.lines[0].get_xydata() assert diagonal[0, 0] == diagonal[0, 1] assert diagonal[1, 0] == diagonal[1, 1]
def test_legend_false(self): rips = Rips() diagrams = [ np.array([[0, 1], [1, 1], [2, 4], [3, 5]]), np.array([[0.5, 3], [2, 4], [4, 5], [10, 15]]) ] f, ax = plt.subplots() rips.plot(diagrams, legend=False, show=False) legend = [ child for child in ax.get_children() if child.__class__.__name__ == "Legend" ] assert len(legend) == 0
def test_greedyperm_circlebottleneck(self): """ Test a relationship between the bottleneck distance and the covering radius for a simple case where computing the bottleneck distance is trivial """ N = 40 np.random.seed(N) t = 2 * np.pi * np.random.rand(N) X = np.array([np.cos(t), np.sin(t)]).T rips1 = Rips(maxdim=1) rips2 = Rips(maxdim=1, n_perm=10) h11 = rips1.fit_transform(X)[1] h12 = rips2.fit_transform(X)[1] assert rips2.r_cover_ > 0 assert np.max(np.abs(h11 - h12)) <= 2 * rips2.r_cover_
def test_lifetime(self): rips = Rips() diagrams = [ np.array([[0, 1], [1, 1], [2, 4], [3, 5]]), np.array([[0.5, 3], [2, 4], [4, 5], [10, 15]]) ] f, ax = plt.subplots() rips.plot(diagrams, lifetime=True, show=False) assert ax.get_ylabel() == 'Lifetime' assert ax.get_xlabel() == 'Birth' line = ax.get_lines()[0] np.testing.assert_array_equal(line.get_ydata(), [0, 0])
def plot_vr_complex(path: str, delimiter: str = ",", thresh: float = 1.0, maxdim: int = 3, coeff=3, barcode: bool = True) -> np.ndarray: """ Plots the Vietoris Rips complex and returns the data. :param path: Path to the desired csv file. :param delimiter: Delimiter for the csv file. :return: Data for a persistence diagram of a Vietoris Rips complex. """ rips = Rips(maxdim=maxdim, coeff=coeff, do_cocycles=True) data = genfromtxt(path, delimiter=delimiter) diagrams = rips.fit_transform(data, distance_matrix=False) rips.plot(diagrams) return diagrams
def Fermat_dgm(data, p, rescaled=False, d=None, mu=None, title=None): ''' Computes the persistence diagram using Fermat distance. ''' distance_matrix = compute_fermat_distance(data, p) if rescaled: distance_matrix = (distance_matrix * len(data)**((p - 1) / d)) / mu rips = Rips() dgms = rips.fit_transform(distance_matrix, distance_matrix=True) fig = plt.figure() rips.plot(dgms, lifetime=True) if title == None: plt.title('Fermat distance with p = %s' % (p)) else: plt.title(title) return dgms
def generate_diagrams(dn, block_list, names=[], c=0): """ Given layers of batch data, generate their Vietoris-Rips Complexes and persistence diagrams :params i: :params block: list of second-order numpy array """ directory = dn os.makedirs(directory, exist_ok=True) # succeeds even if directory exists. os.makedirs(directory + "/pickle_data", exist_ok=True) images = [] diagrams_list = [] birth = 0 death = 0 bs = (0, 0) for j, block in enumerate(block_list): rips = Rips(verbose=False) diagrams = rips.fit_transform(block) for k, diagram in enumerate(diagrams): if len(diagram) != 0: birth = max(birth, max(diagram[:, 0])) death = max(death, max([v for v in diagram[:, 1] if v != inf])) diagrams_list.append(diagrams) birth = int(birth) death = int(death) for j, diagrams in enumerate(diagrams_list): label = str(c) + "_" + names[j].replace("/", "_") fn = '{0}/{1}'.format(directory, label) c += 1 with open( "{directory}/pickle_data/{label}.pickle".format( directory=directory, label=label), 'wb') as f: pickle.dump(diagrams, f) fn += ".png" images.append(fn) return images """
def test_maxdim(self): data = np.random.random((100, 3)) # maxdim refers to the max H_p class, generate all less than rips0 = Rips(maxdim=0) dgm0 = rips0.fit_transform(data) assert len(dgm0) == 1 rips1 = Rips(maxdim=1) dgm1 = rips1.fit_transform(data) assert len(dgm1) == 2 rips2 = Rips(maxdim=2) dgm2 = rips2.fit_transform(data) assert len(dgm2) == 3
def test_multiple(self): rips = Rips() diagrams = [ np.array([[0, 1], [1, 1], [2, 4], [3, 5]]), np.array([[0.5, 3], [2, 4], [4, 5], [10, 15]]) ] f, ax = plt.subplots() rips.plot(diagrams, show=False) pathcols = [ child for child in ax.get_children() if child.__class__.__name__ == "PathCollection" ] assert len(pathcols) == 2 np.testing.assert_array_equal(pathcols[0].get_offsets(), diagrams[0]) np.testing.assert_array_equal(pathcols[1].get_offsets(), diagrams[1])
def test_single(self): """ Most just test this doesn't crash """ rips = Rips() diagram = np.array([[0, 1], [1, 1], [2, 4], [3, 5]]) f, ax = plt.subplots() rips.plot(diagram, show=False) x_plot, y_plot = ax.lines[0].get_xydata().T assert x_plot[0] <= np.min(diagram) assert x_plot[1] >= np.max(diagram) # get PathCollection pathcols = [ child for child in ax.get_children() if child.__class__.__name__ == "PathCollection" ] assert len(pathcols) == 1
def test_sparse(self): np.random.seed(10) thresh = 1.1 # Do dense filtration with threshold data = (datasets.make_circles(n_samples=100)[0] + 5 * datasets.make_circles(n_samples=100)[0]) rips0 = Rips(thresh=thresh, maxdim=1) dgms0 = rips0.fit_transform(data) # Convert to sparse matrix first based on threshold, # then do full filtration rips1 = Rips(maxdim=1) D = makeSparseDM(data, thresh) dgms1 = rips1.fit_transform(D, distance_matrix=True) # The same number of edges should have been added assert rips0.num_edges_ == rips1.num_edges_ I10 = dgms0[1] I11 = dgms1[1] idx = np.argsort(I10[:, 0]) I10 = I10[idx, :] idx = np.argsort(I11[:, 0]) I11 = I11[idx, :] assert np.allclose(I10, I11)
def test_lifetime_removes_birth(self): rips = Rips() diagrams = [ np.array([[0, 1], [1, 1], [2, 4], [3, 5]]), np.array([[0.5, 3], [2, 4], [4, 5], [10, 15]]) ] f, ax = plt.subplots() rips.plot(diagrams, lifetime=True, show=False) pathcols = [ child for child in ax.get_children() if child.__class__.__name__ == "PathCollection" ] modded1 = diagrams[0] modded1[:, 1] = diagrams[0][:, 1] - diagrams[0][:, 0] modded2 = diagrams[1] modded2[:, 1] = diagrams[1][:, 1] - diagrams[1][:, 0] assert len(pathcols) == 2 np.testing.assert_array_equal(pathcols[0].get_offsets(), modded1) np.testing.assert_array_equal(pathcols[1].get_offsets(), modded2)
def PersDiagram(xyz, lifetime=True): plt.rcParams["font.family"] = "Times New Roman" D, elements = Makexyzdistance(xyz) data = ripser(D, distance_matrix=True) rips = Rips() rips.transform(D, distance_matrix=True) rips.dgms_[0] = rips.dgms_[0][0:-1] rips.plot(show=False, lifetime=lifetime, labels=['Connected Components', 'Holes']) L = plt.legend() plt.setp(L.texts, family="Times New Roman") plt.rcParams["font.family"] = "Times New Roman"
def test_coeff(self): data = np.random.random((100, 3)) rips3 = Rips(coeff=3) dgm3 = rips3.fit_transform(data) rips2 = Rips(coeff=2) dgm2 = rips2.fit_transform(data) assert dgm2 is not dgm3, "This is a vacuous assertion, we only care that the above operations did not throw errors"
def test_thresh(self): data = np.random.random((100, 3)) rips0 = Rips(thresh=0.1) dgm0 = rips0.fit_transform(data) rips1 = Rips(thresh=1) dgm1 = rips1.fit_transform(data) # Barcode of H_1 diagram will be smaller, right? assert len(dgm0[1]) < len(dgm1[1]), "Usually"
def test_input_warnings(self): rips = Rips() data = np.random.random((3, 10)) with pytest.warns(UserWarning, match="has more columns than rows") as w: rips.transform(data) data = np.random.random((3, 3)) with pytest.warns( UserWarning, match="input matrix is square, but the distance_matrix") as w: rips.transform(data)
def PersDiagram(xyz, lifetime=True, showplot=True): ''' Creates a visual representation for a persistence diagram Parameters ---------- xyz: string - Name for local file containing data on coordinates representing atoms in compound lifetime: bool, optional - Option to set the y-axis to lifetime value - Options: - ``True``: set coordinates to (birth, death - birth) - ``False``: set coordinates to (birth, death) showplot: bool, optional - Option to output PD plot automatically to screen or not - Options: - ``True``: show plot - ``False``: do not show plot Returns ------- rips: `Rips` object from the ripser module - See `ripser documentation <https://ripser.scikit-tda.org/reference/stubs/ripser.Rips.html#>`_ for this return value. - This object has the data specified in `xyz` fit to it. .. note:: If ``showplot = True``, then a plot of the PD will be output to the screen. ''' plt.rcParams["font.family"] = "Times New Roman" D, elements = Makexyzdistance(xyz) data = ripser(D, distance_matrix=True) # Perform plotting with Rips() object rips = Rips() rips.transform(D, distance_matrix=True) rips.dgms_[0] = rips.dgms_[0][0:-1] rips.plot(show=showplot, lifetime=lifetime, labels=['Connected Components', 'Holes']) L = plt.legend() plt.setp(L.texts, family="Times New Roman") plt.rcParams["font.family"] = "Times New Roman" # Return the Rips object fitted with our data. return rips
def test_set_title(self): rips = Rips() diagrams = [ np.array([[0, 1], [1, 1], [2, 4], [3, 5]]), np.array([[0.5, 3], [2, 4], [4, 5], [10, 15]]) ] f, ax = plt.subplots() rips.plot(diagrams, title='my title', show=False) assert ax.get_title() == 'my title' f, ax = plt.subplots() rips.plot(diagrams, show=False) assert ax.get_title() == ''
def test_maxdim(self): np.random.seed(3100) data = np.random.random((100, 3)) # maxdim refers to the max H_p class, generate all less than rips0 = Rips(maxdim=0) dgm0 = rips0.fit_transform(data) assert len(dgm0) == 1 rips1 = Rips(maxdim=1) dgm1 = rips1.fit_transform(data) assert len(dgm1) == 2 rips2 = Rips(maxdim=2) dgm2 = rips2.fit_transform(data) assert len(dgm2) == 3
def content_topology(df): norm_list = [] for content in df['content']: sentences = re.split('(。|!|\!|\.|?|\?)',content) new_sents = [] for i in range(int(len(sentences)/2)): sent = sentences[2*i] + sentences[2*i+1] new_sents.append(sent) use_list = [] for sent in new_sents: tags = jieba.analyse.extract_tags(content) result = ' '.join(tags) use_list.append(result) docs = np.array(use_list) count = CountVectorizer() tfidf = TfidfTransformer() try: result = tfidf.fit_transform(count.fit_transform(docs)).toarray() except: norm_list.append(0) continue result = Rips().fit_transform(result) result[0] = np.delete(result[0],-1,0) result = np.concatenate((result[0], result[1]), axis=0) landscape_model = Landscape(num_landscapes=len(result)) try: landscape = landscape_model.fit_transform([result]) except: norm_list.append(0) continue length = int(len(landscape[0])/100) Y_values = [landscape[0][i:(i+1)*100] for i in range(length)] norm_list.append(utils.lambda_1_norm(Y_values,[result])) return norm_list