def __init__( self, cg: "ChunkedGraph", *, user_id: str, source_ids: Sequence[np.uint64], sink_ids: Sequence[np.uint64], source_coords: Sequence[Sequence[np.int]], sink_coords: Sequence[Sequence[np.int]], bbox_offset: Sequence[np.int], ) -> None: super().__init__(cg, user_id=user_id, source_coords=source_coords, sink_coords=sink_coords) self.removed_edges = None # Calculated from coordinates and IDs self.source_ids = np.atleast_1d(source_ids).astype(basetypes.NODE_ID) self.sink_ids = np.atleast_1d(sink_ids).astype(basetypes.NODE_ID) self.bbox_offset = np.atleast_1d(bbox_offset).astype(basetypes.COORDINATES) if np.any(np.in1d(self.sink_ids, self.source_ids)): raise cg_exceptions.PreconditionError( f"One or more supervoxel exists as both, sink and source." ) for supervoxel_id in itertools.chain(self.source_ids, self.sink_ids): layer = self.cg.get_chunk_layer(supervoxel_id) if layer != 1: raise cg_exceptions.PreconditionError( f"Supervoxel expected, but {supervoxel_id} is a layer {layer} node." )
def __init__( self, cg: "ChunkedGraph", *, user_id: str, added_edges: Sequence[Sequence[np.uint64]], source_coords: Optional[Sequence[Sequence[np.int]]] = None, sink_coords: Optional[Sequence[Sequence[np.int]]] = None, affinities: Optional[Sequence[np.float32]] = None, ) -> None: super().__init__(cg, user_id=user_id, source_coords=source_coords, sink_coords=sink_coords) self.added_edges = np.atleast_2d(added_edges).astype(basetypes.NODE_ID) self.affinities = None if affinities is not None: self.affinities = np.atleast_1d(affinities).astype(basetypes.EDGE_AFFINITY) if self.affinities.size == 0: self.affinities = None if np.any(np.equal(self.added_edges[:, 0], self.added_edges[:, 1])): raise cg_exceptions.PreconditionError( f"Requested merge operation contains at least one self-loop." ) for supervoxel_id in self.added_edges.ravel(): layer = self.cg.get_chunk_layer(supervoxel_id) if layer != 1: raise cg_exceptions.PreconditionError( f"Supervoxel expected, but {supervoxel_id} is a layer {layer} node." )
def _update_root_ids(self) -> np.ndarray: root_ids = np.unique(self.cg.get_roots(self.removed_edges.ravel())) if len(root_ids) > 1: raise cg_exceptions.PreconditionError( f"All supervoxel must belong to the same object. Already split?" ) return root_ids
def _update_root_ids(self) -> np.ndarray: sink_and_source_ids = np.concatenate((self.source_ids, self.sink_ids)) root_ids = np.unique(self.cg.get_roots(sink_and_source_ids)) if len(root_ids) > 1: raise cg_exceptions.PreconditionError( f"All supervoxel must belong to the same object. Already split?" ) return root_ids
def __init__( self, cg: "ChunkedGraph", *, user_id: str, removed_edges: Sequence[Sequence[np.uint64]], source_coords: Optional[Sequence[Sequence[np.int]]] = None, sink_coords: Optional[Sequence[Sequence[np.int]]] = None, ) -> None: super().__init__(cg, user_id=user_id, source_coords=source_coords, sink_coords=sink_coords) self.removed_edges = np.atleast_2d(removed_edges).astype(basetypes.NODE_ID) if np.any(np.equal(self.removed_edges[:, 0], self.removed_edges[:, 1])): raise cg_exceptions.PreconditionError( f"Requested split operation contains at least one self-loop." ) for supervoxel_id in self.removed_edges.ravel(): layer = self.cg.get_chunk_layer(supervoxel_id) if layer != 1: raise cg_exceptions.PreconditionError( f"Supervoxel expected, but {supervoxel_id} is a layer {layer} node." )
def _filter_graph_connected_components(self): """ Filter out connected components in the graph that are not involved in the local mincut """ ccs = flatgraph_utils.connected_components(self.weighted_graph) removed = self.weighted_graph.new_vertex_property("bool") removed.a = False if len(ccs) > 1: for cc in ccs: # If connected component contains no sources or no sinks, # remove its nodes from the mincut computation if not (np.any(np.in1d(self.source_graph_ids, cc)) and np.any(np.in1d(self.sink_graph_ids, cc))): for node_id in cc: removed[node_id] = True self.weighted_graph.set_vertex_filter(removed, inverted=True) pruned_graph = graph_tool.Graph(self.weighted_graph, prune=True) # Test that there is only one connected component left ccs = flatgraph_utils.connected_components(pruned_graph) if len(ccs) > 1: if self.logger is not None: self.logger.warning( "Not all sinks and sources are within the same (local)" "connected component") raise cg_exceptions.PreconditionError( "Not all sinks and sources are within the same (local)" "connected component") elif len(ccs) == 0: raise cg_exceptions.PreconditionError( "Sinks and sources are not connected through the local graph. " "Please try a different set of vertices to perform the mincut." )
def _sink_and_source_connectivity_sanity_check(self, cut_edge_set): """ Similar to _gt_mincut_sanity_check, except we do the check again *after* removing the fake infinite affinity edges. """ time_start = time.time() for cut_edge in cut_edge_set: # May be more than one edge from vertex cut_edge[0] to vertex cut_edge[1], remove them all parallel_edges = self.weighted_graph.edge(cut_edge[0], cut_edge[1], all_edges=True) for edge_to_remove in parallel_edges: self.edges_to_remove[edge_to_remove] = True self.weighted_graph.set_edge_filter(self.edges_to_remove, True) ccs_test_post_cut = flatgraph_utils.connected_components( self.weighted_graph) # Make sure sinks and sources are among each other and not in different sets # after removing the cut edges and the fake infinity edges illegal_split = False try: for cc in ccs_test_post_cut: if np.any(np.in1d(self.source_graph_ids, cc)): assert np.all(np.in1d(self.source_graph_ids, cc)) assert ~np.any(np.in1d(self.sink_graph_ids, cc)) if np.any(np.in1d(self.sink_graph_ids, cc)): assert np.all(np.in1d(self.sink_graph_ids, cc)) assert ~np.any(np.in1d(self.source_graph_ids, cc)) except AssertionError: if self.split_preview: # If we are performing a split preview, we allow these illegal splits, # but return a flag to return a message to the user illegal_split = True else: raise cg_exceptions.PreconditionError( "Failed to find a cut that separated the sources from the sinks. " "Please try another cut that partitions the sets cleanly if possible. " "If there is a clear path between all the supervoxels in each set, " "that helps the mincut algorithm.") dt = time.time() - time_start if self.logger is not None: self.logger.debug("Verifying local graph: %.2fms" % (dt * 1000)) return ccs_test_post_cut, illegal_split
def test_failed_graph_operation(mocker, root_lock_tracker): """Ensure that root locks got released after successful root lock acquisition + *unsuccessful* graph operation""" fake_operation_id = big_uint64() fake_locked_root_ids = np.array((big_uint64(), big_uint64())) cg = mocker.MagicMock() cg.get_unique_operation_id = mocker.MagicMock( return_value=fake_operation_id) cg.lock_root_loop = mocker.MagicMock( return_value=(True, fake_locked_root_ids), side_effect=root_lock_tracker.add_locks) cg.unlock_root = mocker.MagicMock( return_value=True, side_effect=root_lock_tracker.remove_lock) with pytest.raises(cg_exceptions.PreconditionError): with RootLock(cg, fake_locked_root_ids): raise cg_exceptions.PreconditionError("Something went wrong") assert not root_lock_tracker.active_locks[fake_operation_id]
def handle_l2_chunk_children(table_id, chunk_id, as_array): current_app.request_type = "l2_chunk_children" current_app.table_id = table_id # Convert seconds since epoch to UTC datetime try: timestamp = float(request.args.get("timestamp", time.time())) timestamp = datetime.fromtimestamp(timestamp, UTC) except (TypeError, ValueError) as e: raise (cg_exceptions.BadRequest("Timestamp parameter is not a valid" " unix timestamp")) # Call ChunkedGraph cg = app_utils.get_cg(table_id) chunk_layer = cg.get_chunk_layer(chunk_id) if chunk_layer != 2: raise (cg_exceptions.PreconditionError( f'This function only accepts level 2 chunks, the chunk requested is a level {chunk_layer} chunk' )) rr_chunk = cg.range_read_chunk(chunk_id=np.uint64(chunk_id), columns=column_keys.Hierarchy.Child, time_stamp=timestamp) if as_array: l2_chunk_array = [] for l2 in rr_chunk: svs = rr_chunk[l2][0].value for sv in svs: l2_chunk_array.extend([l2, sv]) return np.array(l2_chunk_array) else: # store in dict of keys to arrays to remove reliance on bigtable l2_chunk_dict = {} for k in rr_chunk: l2_chunk_dict[k] = rr_chunk[k][0].value return l2_chunk_dict
def read_byte_rows( self, start_key: Optional[bytes] = None, end_key: Optional[bytes] = None, end_key_inclusive: bool = False, row_keys: Optional[Iterable[bytes]] = None, columns: Optional[Union[Iterable[column_keys._Column], column_keys._Column]] = None, start_time: Optional[datetime.datetime] = None, end_time: Optional[datetime.datetime] = None, end_time_inclusive: bool = False ) -> Dict[bytes, Union[Dict[column_keys._Column, List[bigtable.row_data.Cell]], List[bigtable.row_data.Cell]]]: """Main function for reading a row range or non-contiguous row sets from Bigtable using `bytes` keys. Keyword Arguments: start_key {Optional[bytes]} -- The first row to be read, ignored if `row_keys` is set. If None, no lower boundary is used. (default: {None}) end_key {Optional[bytes]} -- The end of the row range, ignored if `row_keys` is set. If None, no upper boundary is used. (default: {None}) end_key_inclusive {bool} -- Whether or not `end_key` itself should be included in the request, ignored if `row_keys` is set or `end_key` is None. (default: {False}) row_keys {Optional[Iterable[bytes]]} -- An `Iterable` containing possibly non-contiguous row keys. Takes precedence over `start_key` and `end_key`. (default: {None}) columns {Optional[Union[Iterable[column_keys._Column], column_keys._Column]]} -- Optional filtering by columns to speed up the query. If `columns` is a single column (not iterable), the column key will be omitted from the result. (default: {None}) start_time {Optional[datetime.datetime]} -- Ignore cells with timestamp before `start_time`. If None, no lower bound. (default: {None}) end_time {Optional[datetime.datetime]} -- Ignore cells with timestamp after `end_time`. If None, no upper bound. (default: {None}) end_time_inclusive {bool} -- Whether or not `end_time` itself should be included in the request, ignored if `end_time` is None. (default: {False}) Returns: Dict[bytes, Union[Dict[column_keys._Column, List[bigtable.row_data.Cell]], List[bigtable.row_data.Cell]]] -- Returns a dictionary of `byte` rows as keys. Their value will be a mapping of columns to a List of cells (one cell per timestamp). Each cell has a `value` property, which returns the deserialized field, and a `timestamp` property, which returns the timestamp as `datetime.datetime` object. If only a single `column_keys._Column` was requested, the List of cells will be attached to the row dictionary directly (skipping the column dictionary). """ # Create filters: Column and Time filter_ = get_time_range_and_column_filter( columns=columns, start_time=start_time, end_time=end_time, end_inclusive=end_time_inclusive) # Create filters: Rows row_set = RowSet() if row_keys is not None: for row_key in row_keys: row_set.add_row_key(row_key) elif start_key is not None and end_key is not None: row_set.add_row_range_from_keys(start_key=start_key, start_inclusive=True, end_key=end_key, end_inclusive=end_key_inclusive) else: raise cg_exceptions.PreconditionError( "Need to either provide a valid set of rows, or" " both, a start row and an end row.") # Bigtable read with retries rows = self._execute_read(row_set=row_set, row_filter=filter_) return rows