示例#1
0
def mask_data(data, mask, labels=None):
    if mask is None or np.all(mask):
        return data, None
    else:
        if labels is None:
            newlabels = np.flatnonzero(mask)
        else:
            newlabels = labels[mask]
        if data.ndim==1:
            return compressed_submatrix(data, np.flatnonzero(mask)), newlabels
        else:
            return data[mask], newlabels
def mask_data(data, mask, labels=None):
    if mask is None or np.all(mask):
        return data, None
    else:
        if labels is None:
            newlabels = np.flatnonzero(mask)
        else:
            newlabels = labels[mask]
        if data.ndim == 1:
            return compressed_submatrix(data, np.flatnonzero(mask)), newlabels
        else:
            return data[mask], newlabels
示例#3
0
def Mapper_step(q, pcd, N, point_labels, filt, cover, cluster, cutoff, M,
                metricpar,
                verbose):
    if verbose:
        print ('Start Mapper thread.')
    while True:
        level = q.get()
        if level is None: # Sentinel: end the thread
            break

        # Select the points in this filter range
        idx = cover.data_index(level)
        num_points = idx.size

        # Handle special cases.
        # 0 points in the filter interval: just skip the loop iteration.
        if num_points == 0:
            if verbose:
                print('Warning! Filter level {0} is empty.'.\
                          format(level.index))
            num_clust = 0
            Z = None
            R = None
        # 1 point => 1 cluster
        elif num_points == 1:
            if verbose:
                print('Warning! Filter level {0} has only one point.'.\
                          format(level.index))
            num_clust = 1
            points_clusters = np.zeros(1,dtype=int)
            # We label clusters starting with 0.
            Z = np.empty((0,4))
            R = 0.
        # 2 or more points: general case
        else:
            if verbose:
                print('Filter level {0} has {1} points.'.\
                          format(level.index, num_points))
            if pcd.ndim==1:
                part_data = compressed_submatrix(pcd,idx)
            else:
                part_data = pdist(pcd[idx,:], **metricpar)
            # diameter
            R = part_data.max()
            Z = cluster(part_data)
            if Z[-1,2]>R:
                print('Warning: last clustering distance is bigger than the '
                      'diameter of the filter slice ({0}>{1}).'.\
                          format(Z[-1,2], R))
                R = Z[-1,2]

            if cutoff:
                # heights in the clustering tree
                heights = Z[:,2]
                # determine a cutoff value
                # To do: Improve this!
                num_clust = cutoff(heights, R)
                # actual clustering, after the cutoff value has been determined
                points_clusters = fcluster(Z, num_clust)
                # My fcluster starts labelling clusters at 0!
                #assert num_clust == points_clusters.max()
                assert np.all(np.unique(points_clusters)==\
                                  np.arange(num_clust))

        if cutoff:
            #
            # Determine the nodes of the output graph
            #
            # Each cluster in the partial clustering gives a node
            for cl in range(num_clust):
                points = idx[ points_clusters == cl ] # This gives us the
                # indices of the clusters points in the d matrix.
                # The color is determined by the first filter component!
                attribute = np.median(filt[points,0])
                # Color the nodes by their median filter value
                #
                # Normally, the data points are labeled 0,1,...
                # Allow relabeling of the data point, whatever this is good
                # for.
                # To do: ask Aravind.
                if point_labels is not None:
                    points = point_labels[ points ]
                M.add_node( level.index, points, attribute )
        else:
            # save data for the scale graph algorithm
            M.scale_graph_data.append(dataidx=idx,
                                      dendrogram=Z,
                                      diameter=R,
                                      levelindex=level.index)
示例#4
0
    return res

def crop(f, a, b):
    from scipy.stats import scoreatpercentile
    s1 = scoreatpercentile(f, a)
    s2 = scoreatpercentile(f, 100-b)
    assert s1<=s2
    return np.logical_and(f>=s1, f<=s2)

if __name__=='__main__':
    '''Test equvalence of the Python and the C++ implementation'''
    import cmappertools
    import numpy as np

    for i in range(10000):
        N = np.random.random_integers(1000)
        n = np.random.random_integers(N)
        dm = np.random.rand(N*(N-1)//2)
        idx = np.unique(np.random.randint(N,size=n))
        r = compressed_submatrix(dm,idx)
        s = cmappertools.compressed_submatrix(dm,idx)
        if np.any(r!=s): raise AssertionError
        print("Iteration {0}: OK.".format(i))
else:
    '''Load the C++ routines, if available.'''
    try:
        from cmappertools import compressed_submatrix
    except ImportError:
        sys.stderr.write("The 'cmappertools' module could not be imported.\n")
del sys
def Mapper_step(q, pcd, N, point_labels, filt, cover, cluster, cutoff, M,
                metricpar, verbose):
    if verbose:
        print('Start Mapper thread.')
    while True:
        level = q.get()
        if level is None:  # Sentinel: end the thread
            break

        # Select the points in this filter range
        idx = cover.data_index(level)
        num_points = idx.size

        # Handle special cases.
        # 0 points in the filter interval: just skip the loop iteration.
        if num_points == 0:
            if verbose:
                print('Warning! Filter level {0} is empty.'.\
                          format(level.index))
            num_clust = 0
            Z = None
            R = None
        # 1 point => 1 cluster
        elif num_points == 1:
            if verbose:
                print('Warning! Filter level {0} has only one point.'.\
                          format(level.index))
            num_clust = 1
            points_clusters = np.zeros(1, dtype=int)
            # We label clusters starting with 0.
            Z = np.empty((0, 4))
            R = 0.
        # 2 or more points: general case
        else:
            if verbose:
                print('Filter level {0} has {1} points.'.\
                          format(level.index, num_points))
            if pcd.ndim == 1:
                part_data = compressed_submatrix(pcd, idx)
            else:
                part_data = pdist(pcd[idx, :], **metricpar)
            # diameter
            R = part_data.max()
            Z = cluster(part_data)
            if Z[-1, 2] > R:
                print('Warning: last clustering distance is bigger than the '
                      'diameter of the filter slice ({0}>{1}).'.\
                          format(Z[-1,2], R))
                R = Z[-1, 2]

            if cutoff:
                # heights in the clustering tree
                heights = Z[:, 2]
                # determine a cutoff value
                # To do: Improve this!
                num_clust = cutoff(heights, R)
                # actual clustering, after the cutoff value has been determined
                points_clusters = fcluster(Z, num_clust)
                # My fcluster starts labelling clusters at 0!
                #assert num_clust == points_clusters.max()
                assert np.all(np.unique(points_clusters)==\
                                  np.arange(num_clust))

        if cutoff:
            #
            # Determine the nodes of the output graph
            #
            # Each cluster in the partial clustering gives a node
            for cl in range(num_clust):
                points = idx[points_clusters == cl]  # This gives us the
                # indices of the clusters points in the d matrix.
                # The color is determined by the first filter component!
                attribute = np.median(filt[points, 0])
                # Color the nodes by their median filter value
                #
                # Normally, the data points are labeled 0,1,...
                # Allow relabeling of the data point, whatever this is good
                # for.
                # To do: ask Aravind.
                if point_labels is not None:
                    points = point_labels[points]
                M.add_node(level.index, points, attribute)
        else:
            # save data for the scale graph algorithm
            M.scale_graph_data.append(dataidx=idx,
                                      dendrogram=Z,
                                      diameter=R,
                                      levelindex=level.index)
def crop(f, a, b):
    from scipy.stats import scoreatpercentile
    s1 = scoreatpercentile(f, a)
    s2 = scoreatpercentile(f, 100 - b)
    assert s1 <= s2
    return np.logical_and(f >= s1, f <= s2)


if __name__ == '__main__':
    '''Test equvalence of the Python and the C++ implementation'''
    import cmappertools
    import numpy as np

    for i in range(10000):
        N = np.random.random_integers(1000)
        n = np.random.random_integers(N)
        dm = np.random.rand(N * (N - 1) // 2)
        idx = np.unique(np.random.randint(N, size=n))
        r = compressed_submatrix(dm, idx)
        s = cmappertools.compressed_submatrix(dm, idx)
        if np.any(r != s): raise AssertionError
        print("Iteration {0}: OK.".format(i))
else:
    '''Load the C++ routines, if available.'''
    try:
        from cmappertools import compressed_submatrix
    except ImportError:
        sys.stderr.write("The 'cmappertools' module could not be imported.\n")
del sys