def persistence_diagram(boundary_matrix_csc, dimensions, filtration): """ Compute persistence diagram from a sparse matrix :param boundary_matrix_csc: Sparse matrix :param dimensions: Cell dimensions :param filtration: Filtration :returns: Persistence diagrams """ sort_order = np.lexsort( (dimensions, filtration)) # Last key has higher sort priority boundary_matrix_csc = boundary_matrix_csc[sort_order, :][:, sort_order] dimensions = dimensions[sort_order] filtration = filtration[sort_order] col_count = boundary_matrix_csc.shape[1] assert len(dimensions) == col_count columns = [ boundary_matrix_csc.getcol(col).indices.tolist() for col in range(col_count) ] # Many representations (vector_vector, full_pivot_column, bit_tree_pivot_column) # of PHAT seem to work incorrectly. vector_heap is ok. bdry = phat.boundary_matrix( representation=phat.representations.vector_heap, columns=list(zip(dimensions, columns))) pairs = bdry.compute_persistence_pairs( reduction=phat.reductions.twist_reduction) dgms = pairs_to_diagram(pairs, dimensions, filtration) return dgms
def _compute_persistence_pairs(self, boundary_matrix=None): boundary_matrix = boundary_matrix or self._boundary_matrix self._reduced_boundary_matrix = phat.boundary_matrix( columns=boundary_matrix, representation=phat.representations.sparse_pivot_column, ) pairs = self._reduced_boundary_matrix.compute_persistence_pairs() pairs.sort() self._pairs = list(pairs) return self._pairs
def phat_diagrams(simplices, show_inf=False, verbose=True): """ Do a custom filtration wrapping around phat Parameters ----------- simplices: A list of lists of simplices and their distances the kth element is itself a list of tuples ([idx1, ..., idxk], dist) where [idx1, ..., idxk] is a list of vertices involved in the simplex and "dist" is the distance at which the simplex is added show_inf: Boolean Whether or not to return points that never die Returns -------- dgms: A dictionary of persistence diagrams, where dgms[k] is the persistence diagram for Hk """ ## Convert simplices representation to sparse pivot column # -- sort by birth time, if tie, use order of simplex ordered_simplices = sorted(simplices, key=lambda x: (x[1], len(x[0]))) columns = _simplices_to_sparse_pivot_column(ordered_simplices, verbose) ## Setup boundary matrix and reduce if verbose: print("Computing persistence pairs...") tic = time.time() boundary_matrix = phat.boundary_matrix( columns=columns, representation=phat.representations.sparse_pivot_column ) pairs = boundary_matrix.compute_persistence_pairs() pairs.sort() if verbose: print( "Finished computing persistence pairs (Elapsed Time %.3g)" % (time.time() - tic) ) ## Setup persistence diagrams by reading off distances dgms = _process_distances(pairs, ordered_simplices) ## Add all unpaired simplices as infinite points if show_inf: dgms = _add_unpaired(dgms, pairs, simplices) ## Convert to arrays: dgms = [np.array(dgm) for dgm in dgms.values()] return dgms
def test_phat(): import numpy as np import phat columns = [[], [], [], [], [], [], [], [], [], [], [0, 7], [5, 9], [0, 2], [4, 8], [7, 8], [2, 9], [0, 9], [16, 12, 15], [6, 8], [6, 7], [14, 18, 19], [1, 6], [1, 4], [4, 6], [23, 18, 13], [7, 9], [25, 16, 10], [0, 8], [27, 14, 10], [23, 21, 22], [6, 9], [30, 25, 19], [5, 6], [30, 32, 11], [3, 5], [3, 6], [35, 32, 34], [2, 8], [37, 27, 12], [1, 3], [39, 21, 35], [2, 4], [41, 37, 13]] dimensions = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2 ] bdry = phat.boundary_matrix( representation=phat.representations.vector_heap, columns=list(zip(dimensions, columns))) pairs = np.array( bdry.compute_persistence_pairs( reduction=phat.reductions.twist_reduction)) assert np.all(pairs[:, 0] != 10), "First added edge should kill 7"
def rips_filtration(max_dim, max_scale, dist_mat): """ Builds a boundary matrix for the boundary-Rips filtration up to dimension `max_dim`. Also builds the corresponding list of bigrades follows closely the "incremental algorithm" in the paper on fast Vietoris-Rips comptuation by Zomorodian, with some modification to store boundary matrix and filtration info. That in turn is based on a version of Bron-Kerbosch algorithm. Parameters ---------- max_dim: int >= 0 the highest dimension to compute max_scale: float the highest scale (distance) to consider dist_mat: 2D array an n x n distance matrix, which may be lower-triangular. Returns ------- pairs: list of (column, grade) pairs The barcodes up to dimension max_dim, for the truncated Vietoris-Rips filtration, including only simplices whose index of appearance is <= max_scale. The barcodes are output as a list of three-element lists. Each three-element lists represents one interval of in barcode and has the form [birth,death,dimension] """ max_face_dim = max_dim+1 sorted_simplices = _rips_simplices(max_face_dim, max_scale, dist_mat) len_minus_one = len(sorted_simplices) - 1 cobdy_matrix_pre = _create_coboundary_matrix(sorted_simplices, max_face_dim) # print(cobdy_matrix_pre) # print(sorted_simplices) # print(bdy_matrix_pre) cobdy_matrix = phat.boundary_matrix( representation=phat.representations.bit_tree_pivot_column) cobdy_matrix.columns = cobdy_matrix_pre # call Bryn's PHAT wrapper for the persistence computation pairs = cobdy_matrix.compute_persistence_pairs() # next, rescale the pairs to their original filtration values, eliminating # pairs with the same birth and death time. In keeping with our chosen # output format, we also add the dimension to the pair. scaled_pairs = [] for i in range(len(pairs)): cobirth = sorted_simplices[len_minus_one - pairs[i][0]][1] codeath = sorted_simplices[len_minus_one - pairs[i][1]][1] if codeath < cobirth: dimension = len( sorted_simplices[len_minus_one - pairs[i][1]][0]) - 1 scaled_pairs.append((codeath, cobirth, dimension)) # add in the intervals with endpoint inf # To do this, we first construct an array paired_indices such that # if the j^th simplex (in the coboundary order) appears in a pair, # paired_indices[j] = 1 otherwise paired_incides[j] = 0. paired_indices = np.zeros(len(sorted_simplices)) for i in range(len(pairs)): paired_indices[pairs[i][0]] = 1 paired_indices[pairs[i][1]] = 1 for i in range(len(paired_indices)): if paired_indices[i] == 0: birth = sorted_simplices[len_minus_one - i][1] dimension = len(sorted_simplices[len_minus_one - i][0]) - 1 # we don't report the infinite bars in degree max_dim+1 if dimension < max_face_dim: scaled_pairs.append((birth, float("inf"), dimension)) return scaled_pairs
def build(self, **kwargs): ''' Apply persistent homology using the combination of Cechmate and PHAT. On successful run, this function constructs: 1. A list of dictionaries. Each dictionary corresponds to a birth/death feature containing keys: 'pair', 'birth', 'death', 'H_i', 'generator_ptr' The ordering here has no significance outside of the internals of the cechmate/PHAT algorithms. 2. A collection of useful statistics in the same order for visualization/access of top-level information. These are flat numpy arrays for easy slicing/access without having to work with dictionary/list logic. Available attributes: self.births self.deaths self.H_i self.generator_ptrs 3. A collection of common orderings of the topological features based on the above information generated using by numpy.argsort. Available orderings: self._persistence_order (death-birth; sorted from largest first) self._birth_order (sorted from smallest first) self._death_order (sorted from smallest first) Inputs: None; but you must instantiate the class with the data matrix first. Optional inputs: verbosity: controls amount of print statements. Currently only two levels; 0 : no print statements (default) 1 : reports progress along pipeline. Outputs: None; but the above attributes are stored in the object. ''' verbosity = kwargs.get('verbosity', 0) # this part loosely follows https://cechmate.scikit-tda.org/notebooks/BasicUsage.html rips = cechmate.Rips(maxdim=self.maxdim) if verbosity > 0: print('Building complex...') compl = rips.build(self.X) # TODO: this is the second slowest part if verbosity > 0: print('ordering simplices...') ordered_simplices = sorted(compl, key=lambda x: (x[1], len(x[0]))) # cast as numpy array for handy array slicing later o_s2 = np.array(ordered_simplices) # # TODO: This is the bottleneck right now in terms of speed! # It's written in python; if there's a C++ version sitting around it could # be sped up. The python code doesn't look too crazy... if verbosity > 0: print('Casting to sparse pivot column form...') columns = cechmate.solver._simplices_to_sparse_pivot_column( ordered_simplices) if verbosity > 0: print('Building boundary matrix...') b_m = phat.boundary_matrix( columns=columns, representation=phat.representations.sparse_pivot_column) if verbosity > 0: print('Computing persistence pairs...') pairs = b_m.compute_persistence_pairs( ) # boundary matrix gets reduced in-place here # # OK, here's the sketch of what we're doing # to get out generators: # # 1. Use some criterion to identify which birth/death pairs you want # (e.g. lexsort by homological dimension, then lifetime) # 2. Identify the "pairs" in the associated table (from b_m.compute_persistence_pairs) # 3. Associate "pairs" (boundary matrix columns) with associated simplices # by identifying nonzero entries (b_m.get_column(j)); these are indexes # for ordered_simplices) # 4. Visualize/attach information however you like. # # REVISITED: pass on #1 until visualization/processing stage. # otherwise loosely following 2-4, but no visualization in this function. # pp = np.array( list(pairs)) # cast to numpy array just to get array slicing # Find homological dimension of each thing; following the lead of # cechmate.solver._process_distances(). Hi = np.array([len(o_s2[ppi[0]][0]) - 1 for ppi in pp]) # Pull out simplex information from the processed boundary matrix. if verbosity > 0: print('Identifying generators...') sparse_reduced_b_m = {} for j in range(b_m._matrix.get_num_cols()): thing = b_m._matrix.get_col(j) if len(thing) > 0: sparse_reduced_b_m[j] = np.array(thing, dtype=np.int64) # if verbosity > 0: print('Putting a bow on everything...') topo_features = [] for ii, pair in enumerate(pp): idx = pair[1] birth = o_s2[pair[0]][1] death = o_s2[pair[1]][1] # don't bother with trivial features. if birth == death: continue # gen_simplices = o_s2[sparse_reduced_b_m[idx]][:, 0] # vertex indices in original ordering g_v = np.unique(np.concatenate(gen_simplices)) topo_features.append({ 'pair': pair, 'birth': birth, 'death': death, 'H_i': Hi[ii], 'generator_ptr': g_v }) # # numpy array just for slicing self.topo_features = np.array(topo_features) # store summary information in flat form for easier sorting/accessing by ptr. self.births = np.zeros(len(self.topo_features), dtype=float) self.deaths = np.zeros(len(self.topo_features), dtype=float) self.H_i = np.zeros(len(self.topo_features), dtype=int) generator_ptrs = [] for j, d in enumerate(self.topo_features): self.births[j] = d['birth'] self.deaths[j] = d['death'] self.H_i[j] = d['H_i'] generator_ptrs.append(d['generator_ptr']) # self.generator_ptrs = np.array(generator_ptrs) # create orderings for later use. self._persistence_order = np.argsort(self.births - self.deaths) # largest first self._birth_order = np.argsort(self.births) # smallest first self._death_order = np.argsort(self.deaths) # smallest first if verbosity > 0: print('done.') return
import numpy as np import pandas as pd import phat from boundary_matrix import boundary_matrix toy_data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]) data = pd.DataFrame(data=toy_data, index=['A', 'B', 'C', 'D'], columns=['a', 'b', 'c']) complex = boundary_matrix(data) print "filtered complex: " print complex boundary_matrix = phat.boundary_matrix( representation=phat.representations.vector_vector, columns=complex) persistence_pairs = boundary_matrix.compute_persistence_pairs() persistence_pairs.sort() print persistence_pairs print "There are {} persistence pairs: ".format(len(persistence_pairs)) for a, b in persistence_pairs: print "Birth: {}, Death: {}, lifespan: {}".format(a, b, b - a)
import phat from simplex_check import simplex_check boundary_matrix_square = phat.boundary_matrix( representation=phat.representations.vector_vector) complex = [(0, []), (0, []), (0, []), (1, [0, 1]), (1, [0, 2]), (1, [1, 2]), (0, []), (1, [2, 3]), (1, [1, 3]), (2, [5, 7, 8]), (2, [3, 4, 5])] boundary_matrix_square.columns = complex square_pairs = boundary_matrix_square.compute_persistence_pairs() square_pairs.sort() print("\nThere are %d persistence pairs: " % len(square_pairs)) for pair in square_pairs: print("Birth: %d, Death: %d" % pair) simplex_check(complex, 1) print complex
| \\ | \\ | \\ 4 5| \\ | \\ | 6 \\ | \\ |________\\ 0 2 1 """) import phat # define a boundary matrix with the chosen internal representation boundary_matrix = phat.boundary_matrix(representation = phat.representations.vector_vector) # set the respective columns -- (dimension, boundary) pairs boundary_matrix.columns = [ (0, []), (0, []), (1, [0,1]), (0, []), (1, [1,3]), (1, [0,3]), (2, [2,4,5])] # or equivalently, boundary_matrix = phat.boundary_matrix(representation = ..., columns = ...) # would combine the creation of the matrix and the assignment of the columns # print some information of the boundary matrix: print("\nThe boundary matrix has %d columns:" % len(boundary_matrix.columns))
# this part loosely follows https://cechmate.scikit-tda.org/notebooks/BasicUsage.html rips = cechmate.Rips(maxdim=1) compl = rips.build(X) # this is the second slowest part ordered_simplices = sorted(compl, key=lambda x: (x[1], len(x[0]))) # cast as numpy array for handy array slicing later o_s2 = np.array(ordered_simplices) # # This is the bottleneck right now in terms of speed! # It's written in python; if there's a C++ version sitting around it could # be sped up. The python code doesn't look too crazy... columns = cechmate.solver._simplices_to_sparse_pivot_column(ordered_simplices) b_m = phat.boundary_matrix(columns=columns, representation=phat.representations.sparse_pivot_column) pairs = b_m.compute_persistence_pairs() # boundary matrix gets reduced in-place here dgms = cechmate.solver._process_distances(pairs,ordered_simplices) # get largest non-infinite time/radius. dgms_cat = np.concatenate(list(dgms.values())) dgms_max = dgms_cat[np.logical_not(np.isinf(dgms_cat))].max() # # OK, here's the sketch of what we're doing # to get out generators: # # 1. Use some criterion to identify which birth/death pairs you want # (e.g. lexsort by homological dimension, then lifetime)
def bit_tree_mat(): return phat.boundary_matrix(phat.representations.bit_tree_pivot_column, boundary_matrix)
from __future__ import print_function import sys import phat if __name__=='__main__': test_data = (sys.argv[1:] and sys.argv[1]) or "../../examples/torus.bin" print("Reading test data %s in binary format ..." % test_data) boundary_matrix = phat.boundary_matrix() # This is broken for some reason if not boundary_matrix.load(test_data): print("Error: test data %s not found!" % test_data) sys.exit(1) error = False def compute_chunked(mat): return mat.compute_persistence_pairs(phat.reductions.chunk_reduction) print("Comparing representations using Chunk algorithm ...") print("Running Chunk - Sparse ...") sparse_boundary_matrix = phat.boundary_matrix(phat.representations.sparse_pivot_column, boundary_matrix) sparse_pairs = compute_chunked(sparse_boundary_matrix) print("Running Chunk - Heap ...") heap_boundary_matrix = phat.boundary_matrix(phat.representations.vector_heap, boundary_matrix) heap_pairs = compute_chunked(heap_boundary_matrix) print("Running Chunk - Full ...") full_boundary_matrix = phat.boundary_matrix(phat.representations.full_pivot_column, boundary_matrix)
from __future__ import print_function import sys import phat if __name__ == '__main__': test_data = (sys.argv[1:] and sys.argv[1]) or "../../examples/torus.bin" print("Reading test data %s in binary format ..." % test_data) boundary_matrix = phat.boundary_matrix() # This is broken for some reason if not boundary_matrix.load(test_data): print("Error: test data %s not found!" % test_data) sys.exit(1) error = False def compute_chunked(mat): return mat.compute_persistence_pairs(phat.reductions.chunk_reduction) print("Comparing representations using Chunk algorithm ...") print("Running Chunk - Sparse ...") sparse_boundary_matrix = phat.boundary_matrix( phat.representations.sparse_pivot_column, boundary_matrix) sparse_pairs = compute_chunked(sparse_boundary_matrix) print("Running Chunk - Heap ...") heap_boundary_matrix = phat.boundary_matrix( phat.representations.vector_heap, boundary_matrix) heap_pairs = compute_chunked(heap_boundary_matrix)