示例#1
0
def persistence_diagram(boundary_matrix_csc, dimensions, filtration):
    """ Compute persistence diagram from a sparse matrix

    :param boundary_matrix_csc: Sparse matrix
    :param dimensions: Cell dimensions
    :param filtration: Filtration
    :returns: Persistence diagrams
    """
    sort_order = np.lexsort(
        (dimensions, filtration))  # Last key has higher sort priority
    boundary_matrix_csc = boundary_matrix_csc[sort_order, :][:, sort_order]
    dimensions = dimensions[sort_order]
    filtration = filtration[sort_order]

    col_count = boundary_matrix_csc.shape[1]
    assert len(dimensions) == col_count
    columns = [
        boundary_matrix_csc.getcol(col).indices.tolist()
        for col in range(col_count)
    ]
    # Many representations (vector_vector, full_pivot_column, bit_tree_pivot_column)
    # of PHAT seem to work incorrectly. vector_heap is ok.
    bdry = phat.boundary_matrix(
        representation=phat.representations.vector_heap,
        columns=list(zip(dimensions, columns)))
    pairs = bdry.compute_persistence_pairs(
        reduction=phat.reductions.twist_reduction)
    dgms = pairs_to_diagram(pairs, dimensions, filtration)
    return dgms
示例#2
0
    def _compute_persistence_pairs(self, boundary_matrix=None):
        boundary_matrix = boundary_matrix or self._boundary_matrix

        self._reduced_boundary_matrix = phat.boundary_matrix(
            columns=boundary_matrix,
            representation=phat.representations.sparse_pivot_column,
        )

        pairs = self._reduced_boundary_matrix.compute_persistence_pairs()
        pairs.sort()
        self._pairs = list(pairs)
        return self._pairs
示例#3
0
def phat_diagrams(simplices, show_inf=False, verbose=True):
    """
    Do a custom filtration wrapping around phat

    Parameters
    -----------
    simplices: A list of lists of simplices and their distances
        the kth element is itself a list of tuples ([idx1, ..., idxk], dist)
        where [idx1, ..., idxk] is a list of vertices involved in the simplex
        and "dist" is the distance at which the simplex is added

    show_inf: Boolean
        Whether or not to return points that never die

    Returns
    --------
    dgms: A dictionary of persistence diagrams, where dgms[k] is 
          the persistence diagram for Hk 
    """

    ## Convert simplices representation to sparse pivot column
    #  -- sort by birth time, if tie, use order of simplex
    ordered_simplices = sorted(simplices, key=lambda x: (x[1], len(x[0])))
    columns = _simplices_to_sparse_pivot_column(ordered_simplices, verbose)

    ## Setup boundary matrix and reduce
    if verbose:
        print("Computing persistence pairs...")
        tic = time.time()

    boundary_matrix = phat.boundary_matrix(
        columns=columns, representation=phat.representations.sparse_pivot_column
    )
    pairs = boundary_matrix.compute_persistence_pairs()
    pairs.sort()

    if verbose:
        print(
            "Finished computing persistence pairs (Elapsed Time %.3g)"
            % (time.time() - tic)
        )

    ## Setup persistence diagrams by reading off distances
    dgms = _process_distances(pairs, ordered_simplices)

    ## Add all unpaired simplices as infinite points
    if show_inf:
        dgms = _add_unpaired(dgms, pairs, simplices)

    ## Convert to arrays:
    dgms = [np.array(dgm) for dgm in dgms.values()]

    return dgms
示例#4
0
def test_phat():
    import numpy as np
    import phat
    columns = [[], [], [], [], [], [], [], [], [], [], [0, 7], [5, 9], [0, 2],
               [4, 8], [7, 8], [2, 9], [0, 9], [16, 12, 15], [6, 8], [6, 7],
               [14, 18, 19], [1, 6], [1, 4], [4, 6], [23, 18, 13], [7, 9],
               [25, 16, 10], [0, 8], [27, 14, 10], [23, 21, 22], [6, 9],
               [30, 25, 19], [5, 6], [30, 32, 11],
               [3, 5], [3, 6], [35, 32, 34], [2, 8], [37, 27, 12], [1, 3],
               [39, 21, 35], [2, 4], [41, 37, 13]]
    dimensions = [
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1,
        2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 2, 1, 2, 1, 2
    ]
    bdry = phat.boundary_matrix(
        representation=phat.representations.vector_heap,
        columns=list(zip(dimensions, columns)))
    pairs = np.array(
        bdry.compute_persistence_pairs(
            reduction=phat.reductions.twist_reduction))
    assert np.all(pairs[:, 0] != 10), "First added edge should kill 7"
示例#5
0
def rips_filtration(max_dim, max_scale, dist_mat):
    """
    Builds a boundary matrix for the boundary-Rips filtration up to dimension
     `max_dim`.

    Also builds the corresponding list of bigrades follows closely
    the "incremental algorithm" in the paper on fast Vietoris-Rips comptuation
    by Zomorodian, with some modification to store boundary matrix and
    filtration info. That in turn is based on a version of Bron-Kerbosch
    algorithm.

    Parameters
    ----------

    max_dim: int >= 0
        the highest dimension to compute
    max_scale: float
        the highest scale (distance) to consider
    dist_mat: 2D array
        an n x n distance matrix, which may be lower-triangular.

    Returns
    -------

    pairs: list of (column, grade) pairs
        The barcodes up to dimension max_dim, for the truncated Vietoris-Rips
        filtration, including only simplices whose index of appearance
        is <= max_scale. The barcodes are output as a list of three-element
        lists. Each three-element lists represents one interval of in barcode
        and has the form [birth,death,dimension]
    """
    max_face_dim = max_dim+1
    sorted_simplices = _rips_simplices(max_face_dim, max_scale, dist_mat)
    len_minus_one = len(sorted_simplices) - 1
    cobdy_matrix_pre = _create_coboundary_matrix(sorted_simplices,
                                                 max_face_dim)
    # print(cobdy_matrix_pre)

    # print(sorted_simplices)
    # print(bdy_matrix_pre)

    cobdy_matrix = phat.boundary_matrix(
        representation=phat.representations.bit_tree_pivot_column)
    cobdy_matrix.columns = cobdy_matrix_pre

    # call Bryn's PHAT wrapper for the persistence computation
    pairs = cobdy_matrix.compute_persistence_pairs()

    # next, rescale the pairs to their original filtration values, eliminating
    # pairs with the same birth and death time. In keeping with our chosen
    # output format, we also add the dimension to the pair.
    scaled_pairs = []
    for i in range(len(pairs)):
        cobirth = sorted_simplices[len_minus_one - pairs[i][0]][1]
        codeath = sorted_simplices[len_minus_one - pairs[i][1]][1]
        if codeath < cobirth:
            dimension = len(
                sorted_simplices[len_minus_one - pairs[i][1]][0]) - 1
            scaled_pairs.append((codeath, cobirth, dimension))

    # add in the intervals with endpoint inf
    # To do this, we first construct an array paired_indices such that
    # if the j^th simplex (in the coboundary order) appears in a pair,
    # paired_indices[j] = 1 otherwise paired_incides[j] = 0.

    paired_indices = np.zeros(len(sorted_simplices))
    for i in range(len(pairs)):
        paired_indices[pairs[i][0]] = 1
        paired_indices[pairs[i][1]] = 1
    for i in range(len(paired_indices)):
        if paired_indices[i] == 0:
            birth = sorted_simplices[len_minus_one - i][1]
            dimension = len(sorted_simplices[len_minus_one - i][0]) - 1
            # we don't report the infinite bars in degree max_dim+1
            if dimension < max_face_dim:
                scaled_pairs.append((birth, float("inf"), dimension))
    return scaled_pairs
示例#6
0
    def build(self, **kwargs):
        '''
        Apply persistent homology using the combination of Cechmate and PHAT.

        On successful run, this function constructs:
            1. A list of dictionaries. Each dictionary corresponds to a 
                birth/death feature containing keys:
                    'pair', 'birth', 'death', 'H_i', 'generator_ptr'
                The ordering here has no significance outside of the internals 
                of the cechmate/PHAT algorithms.

            2. A collection of useful statistics in the same order for 
                visualization/access of top-level information. These are 
                flat numpy arrays for easy slicing/access without having to work 
                with dictionary/list logic. Available attributes:
                    self.births
                    self.deaths
                    self.H_i
                    self.generator_ptrs

            3. A collection of common orderings of the topological features
                based on the above information generated using by numpy.argsort.
                Available orderings:
                    self._persistence_order (death-birth; sorted from largest first)
                    self._birth_order       (sorted from smallest first)
                    self._death_order       (sorted from smallest first)

        Inputs: None; but you must instantiate the class with the data matrix first.

        Optional inputs:
            verbosity: controls amount of print statements. Currently only two levels; 
                0 : no print statements (default)
                1 : reports progress along pipeline.

        Outputs: None; but the above attributes are stored in the object.
        '''

        verbosity = kwargs.get('verbosity', 0)

        # this part loosely follows https://cechmate.scikit-tda.org/notebooks/BasicUsage.html
        rips = cechmate.Rips(maxdim=self.maxdim)

        if verbosity > 0: print('Building complex...')
        compl = rips.build(self.X)  # TODO: this is the second slowest part

        if verbosity > 0: print('ordering simplices...')
        ordered_simplices = sorted(compl, key=lambda x: (x[1], len(x[0])))

        # cast as numpy array for handy array slicing later
        o_s2 = np.array(ordered_simplices)

        #
        # TODO: This is the bottleneck right now in terms of speed!
        # It's written in python; if there's a C++ version sitting around it could
        # be sped up. The python code doesn't look too crazy...
        if verbosity > 0: print('Casting to sparse pivot column form...')
        columns = cechmate.solver._simplices_to_sparse_pivot_column(
            ordered_simplices)

        if verbosity > 0: print('Building boundary matrix...')
        b_m = phat.boundary_matrix(
            columns=columns,
            representation=phat.representations.sparse_pivot_column)

        if verbosity > 0: print('Computing persistence pairs...')
        pairs = b_m.compute_persistence_pairs(
        )  # boundary matrix gets reduced in-place here

        #
        # OK, here's the sketch of what we're doing
        # to get out generators:
        #
        # 1. Use some criterion to identify which birth/death pairs you want
        #    (e.g. lexsort by homological dimension, then lifetime)
        # 2. Identify the "pairs" in the associated table (from b_m.compute_persistence_pairs)
        # 3. Associate "pairs" (boundary matrix columns) with associated simplices
        #    by identifying nonzero entries (b_m.get_column(j)); these are indexes
        #    for ordered_simplices)
        # 4. Visualize/attach information however you like.
        #
        # REVISITED: pass on #1 until visualization/processing stage.
        # otherwise loosely following 2-4, but no visualization in this function.
        #

        pp = np.array(
            list(pairs))  # cast to numpy array just to get array slicing

        # Find homological dimension of each thing; following the lead of
        # cechmate.solver._process_distances().
        Hi = np.array([len(o_s2[ppi[0]][0]) - 1 for ppi in pp])

        # Pull out simplex information from the processed boundary matrix.
        if verbosity > 0: print('Identifying generators...')
        sparse_reduced_b_m = {}

        for j in range(b_m._matrix.get_num_cols()):
            thing = b_m._matrix.get_col(j)
            if len(thing) > 0:
                sparse_reduced_b_m[j] = np.array(thing, dtype=np.int64)
        #

        if verbosity > 0: print('Putting a bow on everything...')
        topo_features = []

        for ii, pair in enumerate(pp):
            idx = pair[1]
            birth = o_s2[pair[0]][1]
            death = o_s2[pair[1]][1]

            # don't bother with trivial features.
            if birth == death:
                continue
            #
            gen_simplices = o_s2[sparse_reduced_b_m[idx]][:, 0]

            # vertex indices in original ordering
            g_v = np.unique(np.concatenate(gen_simplices))

            topo_features.append({
                'pair': pair,
                'birth': birth,
                'death': death,
                'H_i': Hi[ii],
                'generator_ptr': g_v
            })
        #

        # numpy array just for slicing
        self.topo_features = np.array(topo_features)

        # store summary information in flat form for easier sorting/accessing by ptr.
        self.births = np.zeros(len(self.topo_features), dtype=float)
        self.deaths = np.zeros(len(self.topo_features), dtype=float)
        self.H_i = np.zeros(len(self.topo_features), dtype=int)

        generator_ptrs = []

        for j, d in enumerate(self.topo_features):
            self.births[j] = d['birth']
            self.deaths[j] = d['death']
            self.H_i[j] = d['H_i']
            generator_ptrs.append(d['generator_ptr'])
        #
        self.generator_ptrs = np.array(generator_ptrs)

        # create orderings for later use.
        self._persistence_order = np.argsort(self.births -
                                             self.deaths)  # largest first
        self._birth_order = np.argsort(self.births)  # smallest first
        self._death_order = np.argsort(self.deaths)  # smallest first

        if verbosity > 0: print('done.')
        return
示例#7
0
import numpy as np
import pandas as pd
import phat
from boundary_matrix import boundary_matrix

toy_data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])
data = pd.DataFrame(data=toy_data,
                    index=['A', 'B', 'C', 'D'],
                    columns=['a', 'b', 'c'])

complex = boundary_matrix(data)
print "filtered complex: "
print complex

boundary_matrix = phat.boundary_matrix(
    representation=phat.representations.vector_vector, columns=complex)

persistence_pairs = boundary_matrix.compute_persistence_pairs()
persistence_pairs.sort()

print persistence_pairs

print "There are {} persistence pairs: ".format(len(persistence_pairs))
for a, b in persistence_pairs:
    print "Birth: {}, Death: {}, lifespan: {}".format(a, b, b - a)
示例#8
0
import phat
from simplex_check import simplex_check

boundary_matrix_square = phat.boundary_matrix(
    representation=phat.representations.vector_vector)

complex = [(0, []), (0, []), (0, []), (1, [0, 1]), (1, [0, 2]), (1, [1, 2]),
           (0, []), (1, [2, 3]), (1, [1, 3]), (2, [5, 7, 8]), (2, [3, 4, 5])]

boundary_matrix_square.columns = complex

square_pairs = boundary_matrix_square.compute_persistence_pairs()

square_pairs.sort()

print("\nThere are %d persistence pairs: " % len(square_pairs))
for pair in square_pairs:
    print("Birth: %d, Death: %d" % pair)

simplex_check(complex, 1)

print complex
示例#9
0
     | \\
     |  \\
     |   \\ 4
    5|    \\
     |     \\
     |  6   \\
     |       \\
     |________\\
     0    2    1

""")

    import phat

    # define a boundary matrix with the chosen internal representation
    boundary_matrix = phat.boundary_matrix(representation = phat.representations.vector_vector)

    # set the respective columns -- (dimension, boundary) pairs
    boundary_matrix.columns = [ (0, []),
                                (0, []),
                                (1, [0,1]),
                                (0, []),
                                (1, [1,3]),
                                (1, [0,3]),
                                (2, [2,4,5])]

    # or equivalently, boundary_matrix = phat.boundary_matrix(representation = ..., columns = ...)
    # would combine the creation of the matrix and the assignment of the columns

    # print some information of the boundary matrix:
    print("\nThe boundary matrix has %d columns:" % len(boundary_matrix.columns))
# this part loosely follows https://cechmate.scikit-tda.org/notebooks/BasicUsage.html
rips = cechmate.Rips(maxdim=1)
compl = rips.build(X)   # this is the second slowest part
ordered_simplices = sorted(compl, key=lambda x: (x[1], len(x[0])))

# cast as numpy array for handy array slicing later
o_s2 = np.array(ordered_simplices)

#
# This is the bottleneck right now in terms of speed!
# It's written in python; if there's a C++ version sitting around it could
# be sped up. The python code doesn't look too crazy...
columns = cechmate.solver._simplices_to_sparse_pivot_column(ordered_simplices)

b_m = phat.boundary_matrix(columns=columns, representation=phat.representations.sparse_pivot_column)
pairs = b_m.compute_persistence_pairs() # boundary matrix gets reduced in-place here

dgms = cechmate.solver._process_distances(pairs,ordered_simplices)

# get largest non-infinite time/radius.
dgms_cat = np.concatenate(list(dgms.values()))
dgms_max = dgms_cat[np.logical_not(np.isinf(dgms_cat))].max()


#
# OK, here's the sketch of what we're doing
# to get out generators:
#
# 1. Use some criterion to identify which birth/death pairs you want
#    (e.g. lexsort by homological dimension, then lifetime)
示例#11
0
文件: self_test.py 项目: xoltar/phat
 def bit_tree_mat():
     return phat.boundary_matrix(phat.representations.bit_tree_pivot_column, boundary_matrix)
示例#12
0
文件: self_test.py 项目: xoltar/phat
from __future__ import print_function
import sys
import phat

if __name__=='__main__':
    test_data = (sys.argv[1:] and sys.argv[1]) or "../../examples/torus.bin"

    print("Reading test data %s in binary format ..." % test_data)

    boundary_matrix = phat.boundary_matrix()
    # This is broken for some reason
    if not boundary_matrix.load(test_data):
        print("Error: test data %s not found!" % test_data)
        sys.exit(1)

    error = False

    def compute_chunked(mat):
        return mat.compute_persistence_pairs(phat.reductions.chunk_reduction)

    print("Comparing representations using Chunk algorithm ...")
    print("Running Chunk - Sparse ...")
    sparse_boundary_matrix = phat.boundary_matrix(phat.representations.sparse_pivot_column, boundary_matrix)
    sparse_pairs = compute_chunked(sparse_boundary_matrix)

    print("Running Chunk - Heap ...")
    heap_boundary_matrix = phat.boundary_matrix(phat.representations.vector_heap, boundary_matrix)
    heap_pairs = compute_chunked(heap_boundary_matrix)

    print("Running Chunk - Full ...")
    full_boundary_matrix = phat.boundary_matrix(phat.representations.full_pivot_column, boundary_matrix)
示例#13
0
 def bit_tree_mat():
     return phat.boundary_matrix(phat.representations.bit_tree_pivot_column,
                                 boundary_matrix)
示例#14
0
from __future__ import print_function
import sys
import phat

if __name__ == '__main__':
    test_data = (sys.argv[1:] and sys.argv[1]) or "../../examples/torus.bin"

    print("Reading test data %s in binary format ..." % test_data)

    boundary_matrix = phat.boundary_matrix()
    # This is broken for some reason
    if not boundary_matrix.load(test_data):
        print("Error: test data %s not found!" % test_data)
        sys.exit(1)

    error = False

    def compute_chunked(mat):
        return mat.compute_persistence_pairs(phat.reductions.chunk_reduction)

    print("Comparing representations using Chunk algorithm ...")
    print("Running Chunk - Sparse ...")
    sparse_boundary_matrix = phat.boundary_matrix(
        phat.representations.sparse_pivot_column, boundary_matrix)
    sparse_pairs = compute_chunked(sparse_boundary_matrix)

    print("Running Chunk - Heap ...")
    heap_boundary_matrix = phat.boundary_matrix(
        phat.representations.vector_heap, boundary_matrix)
    heap_pairs = compute_chunked(heap_boundary_matrix)