def get_labels(self, shape=None): '''Get a set of labels matrices consisting of non-overlapping labels In IJV format, a single pixel might have multiple labels. If you want to use a labels matrix, you have an ambiguous situation and the resolution is to process separate labels matrices consisting of non-overlapping labels. returns a list of label matrixes and the indexes in each ''' if self.__segmented is not None: return [(self.__segmented, self.indices)] elif self.__ijv is not None: if shape is None: shape = self.__shape def ijv_to_segmented(ijv, shape=shape): if shape is not None: pass elif self.has_parent_image: shape = self.parent_image.pixel_data.shape elif len(ijv) == 0: # degenerate case, no parent info and no labels shape = (1, 1) else: shape = np.max(ijv[:, :2], 0) + 2 # add a border of "0" to the right labels = np.zeros(shape, np.int16) if ijv.shape[0] > 0: labels[ijv[:, 0], ijv[:, 1]] = ijv[:, 2] return labels if len(self.__ijv) == 0: return [(ijv_to_segmented(self.__ijv), self.indices)] sort_order = np.lexsort( (self.__ijv[:, 2], self.__ijv[:, 1], self.__ijv[:, 0])) sijv = self.__ijv[sort_order] # # Locations in sorted array where i,j are same consecutively # are locations that have an overlap. # overlap = np.all(sijv[:-1, :2] == sijv[1:, :2], 1) # # Find the # at each location by finding the index of the # first example of a location, then subtracting successive indexes # firsts = np.argwhere(np.hstack( ([True], ~overlap, [True]))).flatten() counts = firsts[1:] - firsts[:-1] indexer = Indexes(counts) # # Eliminate the locations that are singly labeled # sijv = sijv[counts[indexer.rev_idx] > 1, :] counts = counts[counts > 1] if len(counts) == 0: return [(ijv_to_segmented(self.__ijv), self.indices)] # # There are n * n-1 pairs for each coordinate (n = # labels) # n = 1 -> 0 pairs, n = 2 -> 2 pairs, n = 3 -> 6 pairs # pairs = all_pairs(np.max(counts)) pair_counts = counts * (counts - 1) # # Create an indexer for the inputs (sijv) and for the outputs # (first and second of the pairs) # input_indexer = Indexes(counts) output_indexer = Indexes(pair_counts) first = sijv[input_indexer.fwd_idx[output_indexer.rev_idx] + pairs[output_indexer.idx[0], 0], 2] second = sijv[input_indexer.fwd_idx[output_indexer.rev_idx] + pairs[output_indexer.idx[0], 1], 2] # # And sort these so that we get consecutive lists for each # sort_order = np.lexsort((second, first)) first = first[sort_order] second = second[sort_order] # # Eliminate dupes # to_keep = np.hstack( ([True], (first[1:] != first[:-1]) | (second[1:] != second[:-1]))) first = first[to_keep] second = second[to_keep] # # Bincount each label so we can find the ones that have the # most overlap. See cpmorphology.color_labels and # Welsh, "An upper bound for the chromatic number of a graph and # its application to timetabling problems", The Computer Journal, 10(1) # p 85 (1967) # overlap_counts = np.bincount(first) nlabels = len(self.indices) if len(overlap_counts) < nlabels + 1: overlap_counts = np.hstack( (overlap_counts, [0] * (nlabels - len(overlap_counts) + 1))) # # The index to the i'th label's stuff # indexes = np.cumsum(overlap_counts) - overlap_counts # # A vector of a current color per label # v_color = np.zeros(len(overlap_counts), int) # # Assign all non-overlapping to color 1 # v_color[overlap_counts == 0] = 1 # # Assign all absent objects to color -1 # v_color[1:][self.areas == 0] = -1 # # The processing order is from most overlapping to least # processing_order = np.lexsort( (np.arange(len(overlap_counts)), overlap_counts)) processing_order = processing_order[ overlap_counts[processing_order] > 0] for index in processing_order: neighbors = second[indexes[index]:indexes[index] + overlap_counts[index]] colors = np.unique(v_color[neighbors]) if colors[0] == 0: if len(colors) == 1: # all unassigned - put self in group 1 v_color[index] = 1 continue else: # otherwise, ignore the unprocessed group and continue colors = colors[1:] # Match a range against the colors array - the first place # they don't match is the first color we can use crange = np.arange(1, len(colors) + 1) misses = crange[colors != crange] if len(misses): color = misses[0] else: max_color = len(colors) + 1 color = max_color v_color[index] = color # # Now, get ijv groups by color # result = [] for color in np.unique(v_color): if color == -1: continue ijv = self.__ijv[v_color[self.__ijv[:, 2]] == color] indices = np.arange(1, len(v_color))[v_color[1:] == color] result.append((ijv_to_segmented(ijv), indices)) return result else: return []
def __convert_sparse_to_dense(self): from cellprofiler.utilities.hdf5_dict import HDF5ObjectSet sparse = self.get_sparse() if len(sparse) == 0: return self.__set_or_cache_dense( np.zeros([1] + list(self.shape), np.uint16)) # # The code below assigns a "color" to each label so that no # two labels have the same color # positional_columns = [] available_columns = [] lexsort_columns = [] for axis in HDF5ObjectSet.AXES: if axis in sparse.dtype.fields.keys(): positional_columns.append(sparse[axis]) available_columns.append(sparse[axis]) lexsort_columns.insert(0, sparse[axis]) else: positional_columns.append(0) labels = sparse[HDF5ObjectSet.AXIS_LABELS] lexsort_columns.insert(0, labels) sort_order = np.lexsort(lexsort_columns) n_labels = np.max(labels) # # Find the first of a run that's different from the rest # mask = available_columns[0][sort_order[:-1]] != \ available_columns[0][sort_order[1:]] for column in available_columns[1:]: mask = mask | (column[sort_order[:-1]] != column[sort_order[1:]]) breaks = np.hstack(([0], np.where(mask)[0] + 1, [len(labels)])) firsts = breaks[:-1] counts = breaks[1:] - firsts indexer = Indexes(counts) # # Eliminate the locations that are singly labeled # mask = counts > 1 firsts = firsts[mask] counts = counts[mask] if len(counts) == 0: dense = np.zeros([1] + list(self.shape), labels.dtype) dense[[0] + positional_columns] = labels return self.__set_or_cache_dense(dense) # # There are n * n-1 pairs for each coordinate (n = # labels) # n = 1 -> 0 pairs, n = 2 -> 2 pairs, n = 3 -> 6 pairs # pairs = all_pairs(np.max(counts)) pair_counts = counts * (counts - 1) # # Create an indexer for the inputs (indexes) and for the outputs # (first and second of the pairs) # # Remember idx points into sort_order which points into labels # to get the nth label, grouped into consecutive positions. # input_indexer = Indexes(counts) output_indexer = Indexes(pair_counts) # # The start of the run of overlaps and the offsets # run_starts = firsts[output_indexer.rev_idx] offs = pairs[output_indexer.idx[0], :] first = labels[sort_order[run_starts + offs[:, 0]]] second = labels[sort_order[run_starts + offs[:, 1]]] # # And sort these so that we get consecutive lists for each # pair_sort_order = np.lexsort((second, first)) # # Eliminate dupes # to_keep = np.hstack( ([True], (first[1:] != first[:-1]) | (second[1:] != second[:-1]))) to_keep = to_keep & (first != second) pair_idx = pair_sort_order[to_keep] first = first[pair_idx] second = second[pair_idx] # # Bincount each label so we can find the ones that have the # most overlap. See cpmorphology.color_labels and # Welsh, "An upper bound for the chromatic number of a graph and # its application to timetabling problems", The Computer Journal, 10(1) # p 85 (1967) # overlap_counts = np.bincount(first.astype(np.int32)) # # The index to the i'th label's stuff # indexes = np.cumsum(overlap_counts) - overlap_counts # # A vector of a current color per label. All non-overlapping # objects are assigned to plane 1 # v_color = np.ones(n_labels + 1, int) v_color[0] = 0 # # Clear all overlapping objects # v_color[np.unique(first)] = 0 # # The processing order is from most overlapping to least # ol_labels = np.where(overlap_counts > 0)[0] processing_order = np.lexsort((ol_labels, overlap_counts[ol_labels])) for index in ol_labels[processing_order]: neighbors = second[indexes[index]:indexes[index] + overlap_counts[index]] colors = np.unique(v_color[neighbors]) if colors[0] == 0: if len(colors) == 1: # all unassigned - put self in group 1 v_color[index] = 1 continue else: # otherwise, ignore the unprocessed group and continue colors = colors[1:] # Match a range against the colors array - the first place # they don't match is the first color we can use crange = np.arange(1, len(colors) + 1) misses = crange[colors != crange] if len(misses): color = misses[0] else: max_color = len(colors) + 1 color = max_color v_color[index] = color # # Create the dense matrix by using the color to address the # 5-d hyperplane into which we place each label # result = [] dense = np.zeros([np.max(v_color)] + list(self.shape), labels.dtype) slices = tuple([v_color[labels] - 1] + positional_columns) dense[slices] = labels indices = [ np.where(v_color == i)[0] for i in range(1, dense.shape[0] + 1) ] return self.__set_or_cache_dense(dense, indices)
def get_labels(self, shape = None): '''Get a set of labels matrices consisting of non-overlapping labels In IJV format, a single pixel might have multiple labels. If you want to use a labels matrix, you have an ambiguous situation and the resolution is to process separate labels matrices consisting of non-overlapping labels. returns a list of label matrixes and the indexes in each ''' if self.__segmented is not None: return [(self.__segmented, self.indices)] elif self.__ijv is not None: if shape is None: shape = self.__shape def ijv_to_segmented(ijv, shape=shape): if shape is not None: pass elif self.has_parent_image: shape = self.parent_image.pixel_data.shape elif len(ijv) == 0: # degenerate case, no parent info and no labels shape = (1,1) else: shape = np.max(ijv[:,:2], 0) + 2 # add a border of "0" to the right labels = np.zeros(shape, np.int16) if ijv.shape[0] > 0: labels[ijv[:,0],ijv[:,1]] = ijv[:,2] return labels if len(self.__ijv) == 0: return [(ijv_to_segmented(self.__ijv), self.indices)] sort_order = np.lexsort((self.__ijv[:,2], self.__ijv[:,1], self.__ijv[:,0])) sijv = self.__ijv[sort_order] # # Locations in sorted array where i,j are same consecutively # are locations that have an overlap. # overlap = np.all(sijv[:-1,:2] == sijv[1:,:2],1) # # Find the # at each location by finding the index of the # first example of a location, then subtracting successive indexes # firsts = np.argwhere(np.hstack(([True], ~overlap, [True]))).flatten() counts = firsts[1:] - firsts[:-1] indexer = Indexes(counts) # # Eliminate the locations that are singly labeled # sijv = sijv[counts[indexer.rev_idx] > 1, :] counts = counts[counts > 1] if len(counts) == 0: return [(ijv_to_segmented(self.__ijv), self.indices)] # # There are n * n-1 pairs for each coordinate (n = # labels) # n = 1 -> 0 pairs, n = 2 -> 2 pairs, n = 3 -> 6 pairs # pairs = all_pairs(np.max(counts)) pair_counts = counts * (counts - 1) # # Create an indexer for the inputs (sijv) and for the outputs # (first and second of the pairs) # input_indexer = Indexes(counts) output_indexer = Indexes(pair_counts) first = sijv[input_indexer.fwd_idx[output_indexer.rev_idx] + pairs[output_indexer.idx[0], 0], 2] second = sijv[input_indexer.fwd_idx[output_indexer.rev_idx] + pairs[output_indexer.idx[0], 1], 2] # # And sort these so that we get consecutive lists for each # sort_order = np.lexsort((second, first)) first = first[sort_order] second = second[sort_order] # # Eliminate dupes # to_keep = np.hstack(([True], (first[1:] != first[:-1]) | (second[1:] != second[:-1]))) first = first[to_keep] second = second[to_keep] # # Bincount each label so we can find the ones that have the # most overlap. See cpmorphology.color_labels and # Welsh, "An upper bound for the chromatic number of a graph and # its application to timetabling problems", The Computer Journal, 10(1) # p 85 (1967) # overlap_counts = np.bincount(first) nlabels = len(self.indices) if len(overlap_counts) < nlabels + 1: overlap_counts = np.hstack( (overlap_counts, [0] * (nlabels - len(overlap_counts) + 1))) # # The index to the i'th label's stuff # indexes = np.cumsum(overlap_counts) - overlap_counts # # A vector of a current color per label # v_color = np.zeros(len(overlap_counts), int) # # Assign all non-overlapping to color 1 # v_color[overlap_counts == 0] = 1 # # Assign all absent objects to color -1 # v_color[1:][self.areas == 0] = -1 # # The processing order is from most overlapping to least # processing_order = np.lexsort((np.arange(len(overlap_counts)), overlap_counts)) processing_order = processing_order[overlap_counts[processing_order] > 0] for index in processing_order: neighbors = second[indexes[index]:indexes[index] + overlap_counts[index]] colors = np.unique(v_color[neighbors]) if colors[0] == 0: if len(colors) == 1: # all unassigned - put self in group 1 v_color[index] = 1 continue else: # otherwise, ignore the unprocessed group and continue colors = colors[1:] # Match a range against the colors array - the first place # they don't match is the first color we can use crange = np.arange(1, len(colors)+1) misses = crange[colors != crange] if len(misses): color = misses[0] else: max_color = len(colors) + 1 color = max_color v_color[index] = color # # Now, get ijv groups by color # result = [] for color in np.unique(v_color): if color == -1: continue ijv = self.__ijv[v_color[self.__ijv[:,2]] == color] indices = np.arange(1, len(v_color))[v_color[1:] == color] result.append((ijv_to_segmented(ijv), indices)) return result else: return []
def __convert_sparse_to_dense(self): from cellprofiler.utilities.hdf5_dict import HDF5ObjectSet sparse = self.get_sparse() if len(sparse) == 0: return self.__set_or_cache_dense( np.zeros([1] + list(self.shape), np.uint16)) # # The code below assigns a "color" to each label so that no # two labels have the same color # positional_columns = [] available_columns = [] lexsort_columns = [] for axis in HDF5ObjectSet.AXES: if axis in sparse.dtype.fields.keys(): positional_columns.append(sparse[axis]) available_columns.append(sparse[axis]) lexsort_columns.insert(0, sparse[axis]) else: positional_columns.append(0) labels = sparse[HDF5ObjectSet.AXIS_LABELS] lexsort_columns.insert(0, labels) sort_order = np.lexsort(lexsort_columns) n_labels = np.max(labels) # # Find the first of a run that's different from the rest # mask = available_columns[0][sort_order[:-1]] != \ available_columns[0][sort_order[1:]] for column in available_columns[1:]: mask = mask | (column[sort_order[:-1]] != column[sort_order[1:]]) breaks = np.hstack(([0], np.where(mask)[0]+1, [len(labels)])) firsts = breaks[:-1] counts = breaks[1:] - firsts indexer = Indexes(counts) # # Eliminate the locations that are singly labeled # mask = counts > 1 firsts = firsts[mask] counts = counts[mask] if len(counts) == 0: dense = np.zeros([1]+list(self.shape), labels.dtype) dense[[0] + positional_columns] = labels return self.__set_or_cache_dense(dense) # # There are n * n-1 pairs for each coordinate (n = # labels) # n = 1 -> 0 pairs, n = 2 -> 2 pairs, n = 3 -> 6 pairs # pairs = all_pairs(np.max(counts)) pair_counts = counts * (counts - 1) # # Create an indexer for the inputs (indexes) and for the outputs # (first and second of the pairs) # # Remember idx points into sort_order which points into labels # to get the nth label, grouped into consecutive positions. # input_indexer = Indexes(counts) output_indexer = Indexes(pair_counts) # # The start of the run of overlaps and the offsets # run_starts = firsts[output_indexer.rev_idx] offs = pairs[output_indexer.idx[0], :] first = labels[sort_order[run_starts + offs[:, 0]]] second = labels[sort_order[run_starts + offs[:, 1]]] # # And sort these so that we get consecutive lists for each # pair_sort_order = np.lexsort((second, first)) # # Eliminate dupes # to_keep = np.hstack(([True], (first[1:] != first[:-1]) | (second[1:] != second[:-1]))) to_keep = to_keep & (first != second) pair_idx = pair_sort_order[to_keep] first = first[pair_idx] second = second[pair_idx] # # Bincount each label so we can find the ones that have the # most overlap. See cpmorphology.color_labels and # Welsh, "An upper bound for the chromatic number of a graph and # its application to timetabling problems", The Computer Journal, 10(1) # p 85 (1967) # overlap_counts = np.bincount(first.astype(np.int32)) # # The index to the i'th label's stuff # indexes = np.cumsum(overlap_counts) - overlap_counts # # A vector of a current color per label. All non-overlapping # objects are assigned to plane 1 # v_color = np.ones(n_labels+1, int) v_color[0] = 0 # # Clear all overlapping objects # v_color[np.unique(first)] = 0 # # The processing order is from most overlapping to least # ol_labels = np.where(overlap_counts > 0)[0] processing_order = np.lexsort((ol_labels, overlap_counts[ol_labels])) for index in ol_labels[processing_order]: neighbors = second[ indexes[index]:indexes[index] + overlap_counts[index]] colors = np.unique(v_color[neighbors]) if colors[0] == 0: if len(colors) == 1: # all unassigned - put self in group 1 v_color[index] = 1 continue else: # otherwise, ignore the unprocessed group and continue colors = colors[1:] # Match a range against the colors array - the first place # they don't match is the first color we can use crange = np.arange(1, len(colors)+1) misses = crange[colors != crange] if len(misses): color = misses[0] else: max_color = len(colors) + 1 color = max_color v_color[index] = color # # Create the dense matrix by using the color to address the # 5-d hyperplane into which we place each label # result = [] dense = np.zeros([np.max(v_color)]+list(self.shape), labels.dtype) slices = tuple([v_color[labels]-1] + positional_columns) dense[slices] = labels indices = [ np.where(v_color == i)[0] for i in range(1, dense.shape[0]+1)] return self.__set_or_cache_dense(dense, indices)