def mask_data(data, mask, labels=None): if mask is None or np.all(mask): return data, None else: if labels is None: newlabels = np.flatnonzero(mask) else: newlabels = labels[mask] if data.ndim==1: return compressed_submatrix(data, np.flatnonzero(mask)), newlabels else: return data[mask], newlabels
def mask_data(data, mask, labels=None): if mask is None or np.all(mask): return data, None else: if labels is None: newlabels = np.flatnonzero(mask) else: newlabels = labels[mask] if data.ndim == 1: return compressed_submatrix(data, np.flatnonzero(mask)), newlabels else: return data[mask], newlabels
def Mapper_step(q, pcd, N, point_labels, filt, cover, cluster, cutoff, M, metricpar, verbose): if verbose: print ('Start Mapper thread.') while True: level = q.get() if level is None: # Sentinel: end the thread break # Select the points in this filter range idx = cover.data_index(level) num_points = idx.size # Handle special cases. # 0 points in the filter interval: just skip the loop iteration. if num_points == 0: if verbose: print('Warning! Filter level {0} is empty.'.\ format(level.index)) num_clust = 0 Z = None R = None # 1 point => 1 cluster elif num_points == 1: if verbose: print('Warning! Filter level {0} has only one point.'.\ format(level.index)) num_clust = 1 points_clusters = np.zeros(1,dtype=int) # We label clusters starting with 0. Z = np.empty((0,4)) R = 0. # 2 or more points: general case else: if verbose: print('Filter level {0} has {1} points.'.\ format(level.index, num_points)) if pcd.ndim==1: part_data = compressed_submatrix(pcd,idx) else: part_data = pdist(pcd[idx,:], **metricpar) # diameter R = part_data.max() Z = cluster(part_data) if Z[-1,2]>R: print('Warning: last clustering distance is bigger than the ' 'diameter of the filter slice ({0}>{1}).'.\ format(Z[-1,2], R)) R = Z[-1,2] if cutoff: # heights in the clustering tree heights = Z[:,2] # determine a cutoff value # To do: Improve this! num_clust = cutoff(heights, R) # actual clustering, after the cutoff value has been determined points_clusters = fcluster(Z, num_clust) # My fcluster starts labelling clusters at 0! #assert num_clust == points_clusters.max() assert np.all(np.unique(points_clusters)==\ np.arange(num_clust)) if cutoff: # # Determine the nodes of the output graph # # Each cluster in the partial clustering gives a node for cl in range(num_clust): points = idx[ points_clusters == cl ] # This gives us the # indices of the clusters points in the d matrix. # The color is determined by the first filter component! attribute = np.median(filt[points,0]) # Color the nodes by their median filter value # # Normally, the data points are labeled 0,1,... # Allow relabeling of the data point, whatever this is good # for. # To do: ask Aravind. if point_labels is not None: points = point_labels[ points ] M.add_node( level.index, points, attribute ) else: # save data for the scale graph algorithm M.scale_graph_data.append(dataidx=idx, dendrogram=Z, diameter=R, levelindex=level.index)
return res def crop(f, a, b): from scipy.stats import scoreatpercentile s1 = scoreatpercentile(f, a) s2 = scoreatpercentile(f, 100-b) assert s1<=s2 return np.logical_and(f>=s1, f<=s2) if __name__=='__main__': '''Test equvalence of the Python and the C++ implementation''' import cmappertools import numpy as np for i in range(10000): N = np.random.random_integers(1000) n = np.random.random_integers(N) dm = np.random.rand(N*(N-1)//2) idx = np.unique(np.random.randint(N,size=n)) r = compressed_submatrix(dm,idx) s = cmappertools.compressed_submatrix(dm,idx) if np.any(r!=s): raise AssertionError print("Iteration {0}: OK.".format(i)) else: '''Load the C++ routines, if available.''' try: from cmappertools import compressed_submatrix except ImportError: sys.stderr.write("The 'cmappertools' module could not be imported.\n") del sys
def Mapper_step(q, pcd, N, point_labels, filt, cover, cluster, cutoff, M, metricpar, verbose): if verbose: print('Start Mapper thread.') while True: level = q.get() if level is None: # Sentinel: end the thread break # Select the points in this filter range idx = cover.data_index(level) num_points = idx.size # Handle special cases. # 0 points in the filter interval: just skip the loop iteration. if num_points == 0: if verbose: print('Warning! Filter level {0} is empty.'.\ format(level.index)) num_clust = 0 Z = None R = None # 1 point => 1 cluster elif num_points == 1: if verbose: print('Warning! Filter level {0} has only one point.'.\ format(level.index)) num_clust = 1 points_clusters = np.zeros(1, dtype=int) # We label clusters starting with 0. Z = np.empty((0, 4)) R = 0. # 2 or more points: general case else: if verbose: print('Filter level {0} has {1} points.'.\ format(level.index, num_points)) if pcd.ndim == 1: part_data = compressed_submatrix(pcd, idx) else: part_data = pdist(pcd[idx, :], **metricpar) # diameter R = part_data.max() Z = cluster(part_data) if Z[-1, 2] > R: print('Warning: last clustering distance is bigger than the ' 'diameter of the filter slice ({0}>{1}).'.\ format(Z[-1,2], R)) R = Z[-1, 2] if cutoff: # heights in the clustering tree heights = Z[:, 2] # determine a cutoff value # To do: Improve this! num_clust = cutoff(heights, R) # actual clustering, after the cutoff value has been determined points_clusters = fcluster(Z, num_clust) # My fcluster starts labelling clusters at 0! #assert num_clust == points_clusters.max() assert np.all(np.unique(points_clusters)==\ np.arange(num_clust)) if cutoff: # # Determine the nodes of the output graph # # Each cluster in the partial clustering gives a node for cl in range(num_clust): points = idx[points_clusters == cl] # This gives us the # indices of the clusters points in the d matrix. # The color is determined by the first filter component! attribute = np.median(filt[points, 0]) # Color the nodes by their median filter value # # Normally, the data points are labeled 0,1,... # Allow relabeling of the data point, whatever this is good # for. # To do: ask Aravind. if point_labels is not None: points = point_labels[points] M.add_node(level.index, points, attribute) else: # save data for the scale graph algorithm M.scale_graph_data.append(dataidx=idx, dendrogram=Z, diameter=R, levelindex=level.index)
def crop(f, a, b): from scipy.stats import scoreatpercentile s1 = scoreatpercentile(f, a) s2 = scoreatpercentile(f, 100 - b) assert s1 <= s2 return np.logical_and(f >= s1, f <= s2) if __name__ == '__main__': '''Test equvalence of the Python and the C++ implementation''' import cmappertools import numpy as np for i in range(10000): N = np.random.random_integers(1000) n = np.random.random_integers(N) dm = np.random.rand(N * (N - 1) // 2) idx = np.unique(np.random.randint(N, size=n)) r = compressed_submatrix(dm, idx) s = cmappertools.compressed_submatrix(dm, idx) if np.any(r != s): raise AssertionError print("Iteration {0}: OK.".format(i)) else: '''Load the C++ routines, if available.''' try: from cmappertools import compressed_submatrix except ImportError: sys.stderr.write("The 'cmappertools' module could not be imported.\n") del sys